-
Notifications
You must be signed in to change notification settings - Fork 0
/
rbhash.cpppp
957 lines (901 loc) · 36.7 KB
/
rbhash.cpppp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
## param $namespace = 'rbhash';
## param $min_bits = 8;
## param $max_bits = 64;
## param @default_search_args= ('int (*cmp_callback)(void *, size_t)', 'void *userdata');
## param $default_search_cmp = "cmp_callback(userdata, node_id)";
## param @treeprint_args;
## param $debug= 0;
## param $feature_print= 1;
## param $feature_demo= 0;
## param $run_with_scissors= 0;
## param @public_includes = (
## qw( <stdint.h> <stdlib.h> <stdbool.h> <assert.h> ),
## qw( <stdio.h> <string.h> )x!!($feature_print || $feature_demo)
## );
## param @private_includes = qw( "rbhash.h" );
## my $NAMESPACE= uc($namespace);
## my @bits= map +(1<<$_), (log($min_bits)/log(2)) .. (log($max_bits)/log(2));
## sub log2($x) { log($x)/log(2) }
## sub word_type($bits) { 'uint'.$bits.'_t' }
## section PUBLIC;
## use CodeGen::Cpppp::Template "format_commandline";
/*
Generated by rbhash.cpppp using command
${{ format_commandline }}
*/
#include $_ ## for @public_includes
## section PRIVATE;
/*
Generated by rbhash.cpppp using command
${{ format_commandline }}
*/
#include $_ ## for @private_includes
// A setting that disables all the runtime sanity checks and safeguards
#ifndef ${NAMESPACE}_RUN_WITH_SCISSORS
#define ${NAMESPACE}_RUN_WITH_SCISSORS $run_with_scissors
#endif
#ifndef ${NAMESPACE}_ASSERT
/* The assertions of this library are fairly important since so much of the
* implementation is exposed to the rest of the program, so only actually
* remove the checks if RUN_WITH_SCISSORS is set.
*/
#if ${NAMESPACE}_RUN_WITH_SCISSORS
#define ${NAMESPACE}_ASSERT(x) (void)0
#elif defined(NDEBUG)
#define ${NAMESPACE}_ASSERT(x) if (!(x)) return 0
#else
#define ${NAMESPACE}_ASSERT(x) assert(x)
#endif
#endif
## my $assert= "${NAMESPACE}_ASSERT";
## section PUBLIC;
/* MAX_TREE_HEIGHT is the maximum number of nodes from root to leaf in any
* correctly balanced tree. The exact formula for the maximum height (including
* root node) is floor(2*log2(N/2+1)) for a tree of N nodes.
*/
## for my $bits (@bits) {
#define ${NAMESPACE}_MAX_ELEMENTS_$bits 0x${{ sprintf "%X", (1<<($bits-1))-1 }}
#define ${NAMESPACE}_MAX_TREE_HEIGHT_$bits ${{ int(2*log2((2**($bits-1)-1)/2+1)) }}
## }
/* This macro tells you the word offset (treating rbhash as an array of words)
* of the first hash bucket.
*/
#define ${NAMESPACE}_TABLE_WORD_OFS(capacity) ( (capacity)*2 + 2 )
/* This macro selects the word size needed to index 'capacity' number of
* user elements.
*/
#define ${NAMESPACE}_SIZEOF_WORD(capacity) ( \
## for my $bits (@bits) {
## if ($bits < $max_bits) {
(capacity) <= ${NAMESPACE}_MAX_ELEMENTS_$bits? ${{ $bits/8 }} : \
## } else {
${{ $bits/8 }} \
## }
## }
)
/* This macro defines the total size (in bytes) of the rbhash storage
* for a given number of elements and buckets. This does not include
* the user's elements themselves, since those are whatever size the
* user wants them to be, and rbhash doesn't need to know.
*/
#define ${NAMESPACE}_SIZEOF(capacity, buckets) ( \
${NAMESPACE}_SIZEOF_WORD(capacity) \
* ( ${NAMESPACE}_TABLE_WORD_OFS(capacity) + buckets ) \
)
/* Several functions can operate on a "path", which is a list of
* references starting at the bucket and ending at a tree node.
* The path is allocated to the maximum depth that a tree of that
* word-bits-size could reach. Since this drastically affects the
* amount of stack used, a struct is declared for each word-bit size.
*
* The structs each record their length so that they can be passed
* interchangably to the functions. You could even allocate custom
* lengths with alloca, but that seems overcomplicated.
*/
## for my $bits (@bits) {
struct ${namespace}_path_${bits} {
uint8_t len, lim;
size_t refs[${NAMESPACE}_MAX_TREE_HEIGHT_${bits}];
};
## section PRIVATE;
void ${namespace}_path_${bits}_init(struct ${namespace}_path_${bits} *p);
## section PUBLIC;
inline void ${namespace}_path_${bits}_init(struct ${namespace}_path_${bits} *p) {
p->len= 0;
p->lim= ${NAMESPACE}_MAX_TREE_HEIGHT_${bits};
}
## }
// Different template output may end up with different structs claiming
// the name of ${namespace}_path, but that should be OK.
typedef struct ${namespace}_path_${max_bits} ${namespace}_path;
#define ${namespace}_path_init(p) ${namespace}_path_${max_bits}_init(p)
// Iterate one or more places through the R/B tree of a bucket, updating 'path'
extern size_t ${namespace}_path_step(void *rbhash, size_t capacity, ${namespace}_path *path, int ofs);
// Exchange the tree node of one node_id (at the end of 'path') for another node_id
extern size_t ${namespace}_path_swap(void *rbhash, size_t capacity, ${namespace}_path *path, size_t new_node_id);
// implementation detail used to reduce size of inline functions
extern size_t ${namespace}_capacity_bounds_assertion(size_t capacity);
// Add a node_id at the end of 'path', and balance the tree if needed
## for my $bits (@bits) {
## my $word_t= word_type($bits);
extern size_t ${namespace}_path_insert_$bits($word_t *rbhash, ${namespace}_path *path, size_t node_id);
## }
inline size_t ${namespace}_path_insert(void *rbhash, size_t capacity, ${namespace}_path *path, size_t node) {
return
## for my $bits (@bits) {
## my $word_t= word_type($bits);
(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits)? ${namespace}_path_insert_$bits(($word_t*) rbhash, path, node) :
## }
${namespace}_capacity_bounds_assertion(capacity);
}
// Remove the node_id from the end of 'path', and balance the tree if needed
## for my $bits (@bits) {
## my $word_t= word_type($bits);
extern size_t ${namespace}_path_delete_$bits($word_t *rbhash, ${namespace}_path *path);
## }
inline size_t ${namespace}_path_delete(void *rbhash, size_t capacity, ${namespace}_path *path) {
## for my $bits (@bits) {
## my $word_t= word_type($bits);
(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits)? ${namespace}_path_delete_$bits(($word_t*) rbhash, path) :
## }
${namespace}_capacity_bounds_assertion(capacity);
}
## # Define a search API for a custom comparison function
## # Parameters:
## # sub_ns: namespace component to be appended to the rbhash namespace
## # api_args: arrayref of C function parameters (strings) added to each search function
## # cmp: a string of C that calls a function to compare node_id with the search_args
## # header: if set, emits the API declarations into that named section. default is 'public'
## # unit: if set, emits the implementation into that named section. default is the current section.
##
## sub define_search_api(%options) {
## my $search_ns= $namespace;
## $search_ns .= '_' . $options{sub_ns} if length($options{sub_ns}//'');
## my @search_args= $options{api_args} or die "'search_args' are required\n";
## my $search_cmp= $options{cmp} or die "'cmp' is required\n";
##
## my $orig_section= $self->current_output_section;
## section($options{header} || PUBLIC);
// Find a node_id matching the search criteria and fill in the 'path' to it, else return 0
extern size_t ${search_ns}_find_path(void *rbhash, size_t capacity, ${namespace}_path *path, size_t bucket_idx, @search_args);
// Simplified search for node_id, without building a path
extern size_t ${search_ns}_find(void *rbhash, size_t capacity, size_t bucket_idx, @search_args);
// Insert a node_id, unless one already matches the search criteria
extern size_t ${search_ns}_insert(void *rbhash, size_t capacity, size_t node_id, size_t bucket_idx, @search_args);
// Delete a node matching the search criteria
extern size_t ${search_ns}_delete(void *rbhash, size_t capacity, size_t bucket_idx, @search_args);
## section($options{unit} || $orig_section);
/* Find a node in the hash table, or tree. Returns the node_id, or 0 if no
* nodes match.
*
* This is a simplified version of find_path that doesn't keep track of the
* path through the tree, saving time but not facilitating inserts or deletes.
*/
size_t ${search_ns}_find(
void *rbhash, size_t capacity, size_t bucket_idx,
@search_args
) {
size_t node_id= 0;
int cmp;
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
node_id= (($word_t *)rbhash)[ ${NAMESPACE}_TABLE_WORD_OFS(capacity) + bucket_idx ] >> 1;
while (node_id && (cmp= $search_cmp))
node_id= (($word_t *)rbhash)[ (node_id<<1) | (cmp < 0? 0 : 1) ] >> 1;
}
## }
else {
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$max_bits);
}
return node_id;
}
/* Find a node in the hash table, and record the path to arrive at the node
* or the node pointer where it would exist. The path can be used for
* inserting or deleting without re-comparing any elements.
*
* The path should already have been initialized using `${namespace}_path_init`.
*/
size_t ${search_ns}_find_path(
void *rbhash, size_t capacity, ${namespace}_path *path, size_t bucket_idx,
@search_args
) {
size_t ref, node_id= 0;
int cmp, p_i= 0, p_lim= path->lim;
path->len= 0; // in case $assert calls 'return 0'
$assert(p_lim > 0);
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
$word_t *rbhash_w= ($word_t*) rbhash;
path->refs[0]= ${NAMESPACE}_TABLE_WORD_OFS(capacity) + bucket_idx;
node_id= rbhash_w[ path->refs[0] ] >> 1;
while (node_id && (cmp= $search_cmp)) {
ref= (node_id<<1) | (cmp < 0? 0 : 1);
++p_i;
$assert(p_i < p_lim);
path->refs[p_i]= ref;
node_id= rbhash_w[ref] >> 1;
}
}
## }
else {
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$max_bits);
}
path->len= p_i+1;
return node_id;
}
/* Insert a node into the hashtable, storing collisions in a tree.
* If it finds a node with same key, it returns that index and does not insert
* the new node, else it will insert and return your 'new_node' value.
* If it returns node 0, you have a corrupted data structure.
*/
extern size_t ${search_ns}_insert(
void *rbhash, size_t capacity, size_t at_node_id, size_t bucket_idx,
@search_args
) {
size_t node_id= 0, ref= ${NAMESPACE}_TABLE_WORD_OFS(capacity) + bucket_idx;
int cmp, p_i= 0, p_lim;
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
$word_t *rbhash_w= ($word_t*) rbhash;
node_id= rbhash_w[ref] >> 1;
if (!node_id) {
rbhash_w[ref]= at_node_id << 1;
return at_node_id;
}
else {
struct ${namespace}_path_${bits} path;
${namespace}_path_${bits}_init(&path);
p_lim= path.lim;
path.refs[0]= ref;
do {
if (!(cmp= $search_cmp))
return node_id;
ref= (node_id<<1) | (cmp < 0? 0 : 1);
++p_i;
$assert(p_i < p_lim);
path.refs[p_i]= ref;
node_id= rbhash_w[ref] >> 1;
} while (node_id);
node_id= at_node_id;
// Handle simple case of adding to black parent without invoking balance.
if (!(rbhash_w[path.refs[p_i-1]] & 1)) {
rbhash_w[ref]= (node_id << 1) | 1;
return node_id;
}
path.len= p_i+1;
return ${namespace}_path_insert_$bits(rbhash_w, (${namespace}_path*) &path, node_id);
}
}
## }
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$max_bits);
return 0;
}
/* Find and delete a node in the hashtable. If found, this returns the node_id
* that was removed. If not found (or if the data structure is currupt) this
* returns 0. It may also return 0 if an assertion fails and you have disabled
* aborting on assertions.
*/
extern size_t ${search_ns}_delete(
void *rbhash, size_t capacity, size_t bucket_idx,
@search_args
) {
size_t cur= 0, ref= ${NAMESPACE}_TABLE_WORD_OFS(capacity) + bucket_idx;
int cmp, p_i= 0, p_lim;
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
$word_t *rbhash_w= ($word_t*) rbhash;
if ((cur= rbhash_w[ref])) {
struct ${namespace}_path_${bits} path;
${namespace}_path_${bits}_init(&path);
p_lim= path.lim;
path.refs[0]= ref;
#define node_id (cur >> 1)
while ((cmp= $search_cmp)) {
#undef node_id
ref= (cur|1) ^ (cmp < 0? 1 : 0);
cur= rbhash_w[ref];
if (!cur)
return 0;
++p_i;
$assert(p_i < p_lim);
path.refs[p_i]= ref;
}
path.len= p_i+1;
return ${namespace}_path_delete_$bits(rbhash_w, (${namespace}_path*) &path);
}
}
## }
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_${max_bits});
return 0;
}
## section $orig_section;
## } # end define_search_api
##
## section PRIVATE;
/* Only called when capacity is out of bounds. Used by the inline bit-selectors. */
extern size_t ${namespace}_capacity_bounds_assertion(size_t capacity) {
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_$max_bits);
return 0;
}
## define_search_api(
## api_args => \@default_search_args,
## cmp => $default_search_cmp,
## );
/* Given a path to a node, make that path point to a new node assuming that
* the new node contains the same search key that the old node used to.
* This is used when moving array elements to a new index where the NodeID
* would be different.
*
* This is a O(1) operation, though building the path probably took O(log N).
*
* Returns the NodeID that the path previously ended with, and the path has
* been modified to end with new_node_id and is still valid. May return
* 0 if an assertion fails and you have disabled assertions.
*/
extern size_t ${namespace}_path_swap(
void *rbhash, size_t capacity, ${namespace}_path *path, size_t new_node_id
) {
size_t ref;
/* path must point to a node, and new_node_id must not be the sentinel */
$assert(path->len > 0 && new_node_id);
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $nodeint_t= $bits < 64? 'uint'.($bits*2).'_t' : undef;
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
$word_t *rbhash_w= ($word_t*) rbhash, prev;
// It is an error if new_node_id is not already zeroed
## if ($nodeint_t) {
$assert((($nodeint_t*) rbhash)[new_node_id] == 0);
## } else {
$assert(rbhash_w[new_node_id << 1] == 0 && rbhash_w[(new_node_id << 1)|1] == 0);
## }
// Swap the references
ref= path->refs[path->len-1];
prev= rbhash_w[ref];
rbhash_w[ref]= (new_node_id << 1) | (prev&1);
## if ($nodeint_t) {
(($nodeint_t*) rbhash)[new_node_id]= (($nodeint_t*) rbhash)[prev>>1];
// and clear out the 'prev' before returning it
(($nodeint_t*) rbhash)[prev>>1]= 0;
## } else {
rbhash_w[new_node_id << 1]= rbhash_w[prev >> 1 << 1];
rbhash_w[(new_node_id << 1) | 1]= rbhash_w[prev|1];
// and clear out the 'prev' before returning it
rbhash_w[prev >> 1 << 1]= 0;
rbhash_w[prev|1]= 0;
## }
return prev >> 1;
}
## }
$assert(capacity <= ${NAMESPACE}_MAX_ELEMENTS_${max_bits});
return 0;
}
/* Insert a node_id at the end of a path which points to the Sentinel.
* The final ref will be updated to point to node_id, and then the tree
* will be balanced according to the Red/Black algorithm. The path is
* destroyed in the process and should not be used after this call.
* (the path could be updated during balance rotations, but would add
* overhead and users are unlikely to need it afterward anyway)
*
* Returns the node_id on success, or 0 on an assertion failure if you
* disabled assertions.
*/
## for my $bits (@bits) {
## my $word_t= word_type($bits);
extern size_t ${namespace}_path_insert_$bits(
$word_t *rbhash, ${namespace}_path *path, size_t node_id
) {
/*
Legend for deciphering crazy bitwise operations below:
X_ref - the index within the rbhash array which holds X
X= rbhash[X_ref] - an integer of ((node_id << 1) | red)
i.e. if pos is like a pointer with embedded color information,
pos_ref is like a pointer to that pointer.
X & 1 - 1 = red, 0 = black
X >> 1 - the node_id X is pointing to
If X_ref is from a tree node (true for all path->refs[i > 0]) then
the location of the ref also indicates which node it belongs to,
by virtue of nodes being located at rbhash[node_id*2].
X_ref >> 1 - the node_id of the parent of X
X_ref & 1 - true if X is the right-subtree of its parent
X_ref ^ 1 - a ref to the sibling of X
X | 1 - the ref to X's node_id's right subtree
X >> 1 << 1 - the ref to X's node_id's left subtree
(X|1) ^ 1 - same, maybe optimized if (X|1) is already in a register
X ^ 1 - a shortcut for one of the above when the color is known
*/
int p_i= path->len - 1;
// Empty paths or paths ending with a non-Sentinel reference are invalid.
$assert(path->len > 0 && rbhash[path->refs[p_i]] == 0);
// Add new_node to the final parent-ref of the path.
// If p_i is 0, this will be altering the hash bucket.
rbhash[path->refs[p_i--]]= (node_id << 1) | 1; // and make it red
// 'pos' will be the parent node of that.
while (p_i > 0) {
$word_t pos_ref= path->refs[p_i--];
$word_t pos= rbhash[pos_ref];
$word_t parent_ref= path->refs[p_i];
// if current is a black node, no rotations needed
if (!(pos & 1))
break;
// pos is red, its new child is red, and parent will be black.
// if the sibling is also red, we can pull down the color black from the parent
// if not, need a rotation.
if (!(rbhash[pos_ref^1]&1)) {
// Sibling is black, need a rotation
// if the imbalanced child (red node) is on the same side as the parent,
// need to rotate those lower nodes to the opposite side in preparation
// for the rotation.
// e.g. if pos_ref is leftward (even) and pos's rightward child (odd) is the red one...
$word_t child_ref= pos ^ (pos_ref&1);
$word_t child= rbhash[child_ref];
if (child&1) {
// rotate pos toward [side] so parent's [side] now points to pos's [otherside]
// set pos's child-ref to child's [otherside] ref
$word_t near_grandchild_ref= child ^ (child_ref&1);
rbhash[child_ref]= rbhash[near_grandchild_ref];
// set child's [side] to pos
rbhash[near_grandchild_ref]= pos;
pos= child; // keep pos as a red node, soon to become black
rbhash[pos_ref]= child;
// parent's [side] has not been updated here, but is about to become 'child'
child_ref= near_grandchild_ref^1;
child= rbhash[child_ref];
}
// Now we can rotate toward parent to balance the tree.
rbhash[pos_ref]= child;
rbhash[child_ref]= pos_ref|1; // = parent, colored red. simplification of ((pos_ref>>1)<<1)|1
rbhash[parent_ref]= pos^1; // also make pos black
// rotation finished, exit.
break;
}
rbhash[pos_ref^1] ^= 1; // toggle color of sibling
rbhash[pos_ref]= pos^1; // toggle color of pos
rbhash[parent_ref] ^= 1; // toggle color of parent
// Now pos is black.
// Jump twice up the tree so that once again, pos has one red child.
p_i--;
}
// Root of tree is always black
if (rbhash[path->refs[0]] & 1)
rbhash[path->refs[0]] ^= 1;
#if !${NAMESPACE}_RUN_WITH_SCISSORS
// Path is no longer valid, because rotations may have destroyed it.
path->len= 0;
#endif
return node_id;
}
## }
/* Delete a node at the end of a path. The path must end with a non-Sentinel
* reference, and must also be allocated to the maximum height of the tree,
* because the node you want to delete might need to be replaced by a node
* deeper in the tree. The tree will be re-balanced using the Red/Black
* algorithm. If this is the last node in the tree, it clears the hash bucket.
*
* The path is destroyed in the process, as rotations and node-replacement
* occur. You may not use the path afterward, even if the function fails.
* (the path could be updated during balance rotations, but would add
* overhead and users are unlikely to need it afterward anyway)
*
* Returns the deleted node_id on success, or 0 on an assertion failure if
* you disabled assertions.
*/
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $nodeint_t= $bits < 64? 'uint'.($bits*2).'_t' : undef;
## my sub clear_lsb($expr) { $expr= "($expr)" if $expr =~ /\W/; "($expr >> 1 << 1)" }
extern size_t ${namespace}_path_delete_$bits($word_t *rbhash, ${namespace}_path *path) {
// See ${namespace}_path_insert for the notes on the bitwise operations
$word_t pos, ch1, ch2, sibling;
int p_i= path->len-1, p_lim= path->lim;
size_t *parent_refs= path->refs, ref, pos_ref;
// Path should be at least 1 element (the bucket root ref)
$assert(path->len >= 1);
// Read the final ref to find 'pos_ref' and 'pos'
pos_ref= parent_refs[p_i];
pos= rbhash[pos_ref];
// Path must point to a non-sentinel node
$assert(pos != 0);
// If pos has children, find a leaf to swap with.
// Then delete this node in the leaf's position.
// Note that normal red/black would delete the element first, then swap, but if we do that
// a rotation could change the path->refs putting the node-to-delete somwhere else.
ch1= rbhash[pos], ch2= rbhash[pos ^ 1];
if (ch1 || ch2) {
if (ch1 && ch2) {
int orig_p_i= p_i;
$word_t alt= pos, alt2;
// Descend one level to the left.
// The path should always have room for this additional reference if it
// was allocated to max-tree-height and the tree is actually balanced.
++p_i;
$assert(p_i < p_lim);
parent_refs[p_i]= ref= ${{ clear_lsb('pos') }}; // go left;
alt= rbhash[ref]; // either ch1 or ch2, but now we know it's the left one
// descend as many levels as possible to the right
while ((alt= rbhash[ref= alt | 1])) {
++p_i;
$assert(p_i < p_lim);
parent_refs[p_i]= ref;
}
// 'alt' is the node we swap with.
alt= rbhash[parent_refs[p_i]];
// is there one to the left?
if ((alt2= rbhash[${{ clear_lsb('alt') }}])) {
$assert(alt2 & 1);
// it is required to be a red leaf, so replace alt with it
rbhash[parent_refs[p_i]]= alt2 ^ 1;
## if ($nodeint_t) {
(($nodeint_t *)rbhash)[alt2 >> 1]= 0;
// Now substitute this for pos and we're done.
(($nodeint_t *)rbhash)[alt >> 1]= (($nodeint_t *)rbhash)[pos >> 1];
## } else {
rbhash[alt2]= 0;
rbhash[alt2 ^ 1]= 0;
// Now substitute this for pos and we're done.
rbhash[alt | 1]= rbhash[pos | 1];
rbhash[(alt | 1) ^ 1]= rbhash[(pos | 1) ^ 1];
## }
rbhash[pos_ref]= ${{ clear_lsb('alt') }} | (pos & 1); // preserve color of pos
goto done;
}
else {
// swap colors of alt and pos
alt ^= pos & 1;
pos ^= alt & 1;
alt ^= pos & 1;
## if ($nodeint_t) {
(($nodeint_t *)rbhash)[alt >> 1]= (($nodeint_t *)rbhash)[pos >> 1];
## } else {
rbhash[alt | 1]= rbhash[pos | 1]; // copy right
rbhash[(alt | 1) ^ 1]= rbhash[(pos | 1) ^ 1]; // copy left
## }
rbhash[pos_ref]= alt;
// the parent ref at orig_p_i+1 just changed address, so update that
// (and this affects the next line if alt was a child of pos)
parent_refs[orig_p_i + 1]= ${{ clear_lsb('alt') }}; // was left branch at that point
pos_ref= parent_refs[p_i];
}
}
else {
// Node is black with one child. Swap with it.
rbhash[pos_ref]= ${{ clear_lsb('ch1 | ch2') }}; // and make it black
goto done;
}
}
// Remove it.
rbhash[pos_ref]= 0;
// It was a black node with no children. Now it gets interesting.
if (!(pos & 1)) {
// The tree must have the same number of black nodes along any path from root
// to leaf. We want to remove a black node, disrupting the number of black
// nodes along the path from the root to the current leaf. To correct this,
// we must either reduce all other paths, or add a black node to the current
// path.
// Loop until the current node is red, or until we get to the root node.
sibling= rbhash[pos_ref ^ 1];
--p_i; // p_i is now the index of the ref to the parent
while (p_i >= 0) {
size_t near_nephew_ref;
$word_t near_nephew;
// If the sibling is red, we are unable to reduce the number of black
// nodes in the sibling tree, and we can't increase the number of black
// nodes in our tree.. Thus we must do a rotation from the sibling
// tree to our tree to give us some extra (red) nodes to play with.
// This is Case 1 from the text
if (sibling & 1) {
// Node is black and sibling is red. Get ref to sibling's near subtree
near_nephew_ref= (sibling ^ 1) | (pos_ref & 1);
// sibling is new parent, and now black.
rbhash[parent_refs[p_i]]= sibling ^ 1;
// move sibling's child under parent, becoming new sibling (which is black)
sibling= rbhash[near_nephew_ref];
rbhash[pos_ref ^ 1]= sibling;
rbhash[near_nephew_ref]= pos_ref | 1; // former sibling sameside tree = parent, now red
++p_i;
$assert(p_i < p_lim);
parent_refs[p_i] = near_nephew_ref; // insert new parent into list
}
// sibling will be black here
// If the sibling is black and both children are black, we have to
// reduce the black node count in the sibling's tree to match ours.
// This is Case 2a from the text.
near_nephew_ref= sibling | (pos_ref & 1);
near_nephew= rbhash[near_nephew_ref];
if (!((near_nephew|rbhash[near_nephew_ref ^ 1]) & 1)) {
$assert(sibling > 1);
rbhash[pos_ref ^ 1] |= 1; // change sibling to red
// Now we move one level up the tree to continue fixing the other branches
if (p_i < 1)
break;
pos_ref= parent_refs[p_i--];
if (rbhash[pos_ref] & 1) {
// Now, make the current node black (to fulfill Case 2b)
rbhash[pos_ref] ^= 1;
break;
}
sibling= rbhash[pos_ref ^ 1];
}
else {
// sibling will be black with 1 or 2 red children here
// If one of the sibling's children are red, we again can't make the
// sibling red to balance the tree at the parent, so we have to do a
// rotation. If the "near" nephew is red and the "far" nephew is
// black, we need to rotate that tree away before rotating the
// parent toward.
// After doing a rotation and rearranging a few colors, the effect is
// that we maintain the same number of black nodes per path on the far
// side of the parent, and we gain a black node on the current side,
// so we are done.
if (near_nephew & 1) {
// Case 3 from the text, double rotation
size_t tmp_ref= near_nephew ^ (pos_ref & 1); // near nephew's far child
rbhash[near_nephew_ref]= rbhash[tmp_ref];
rbhash[pos_ref ^ 1]= near_nephew;
rbhash[tmp_ref]= sibling;
sibling= near_nephew ^ 1; // make it black
near_nephew_ref= sibling | (pos_ref & 1);
}
else
rbhash[near_nephew_ref ^ 1] ^= 1; // far nephew becomes black
// now Case 4 from the text
$assert(sibling > 1);
rbhash[pos_ref ^ 1]= rbhash[near_nephew_ref];
// parent becomes black, balancing current path
rbhash[near_nephew_ref]= ${{ clear_lsb('pos_ref') }};
// Sibling assumes parent's color and position
rbhash[parent_refs[p_i]]= sibling | (rbhash[parent_refs[p_i]] & 1);
break;
}
}
}
done:
// Ensure root-ref is black
if (rbhash[parent_refs[0]] & 1)
rbhash[parent_refs[0]] ^= 1;
// clean the 'pos' node for future use
## if ($nodeint_t) {
(($nodeint_t *)rbhash)[pos >> 1]= 0;
## } else {
rbhash[pos]= 0;
rbhash[pos ^ 1]= 0;
## }
#if !${NAMESPACE}_RUN_WITH_SCISSORS
// Path is no longer valid, because rotations may have destroyed it.
path->len= 0;
#endif
return pos >> 1;
}
## }
## if ($feature_print) {
## section PUBLIC;
// Handy for gdb:
// p ${namespace}_print(rbhash, capacity, buckets, NULL, NULL, stdout)
extern void ${namespace}_print(void *rbhash, size_t capacity, size_t n_buckets,
void (*print_node)(void*,size_t,FILE*), void* userdata, FILE *out);
## section PRIVATE;
## for my $bits (@bits) {
## my $word_t= word_type($bits);
// Handy for gdb: "p ${namespace}_treeprint_$bits(rbhash, capacity, i, i, NULL, NULL, stdout)"
static size_t ${namespace}_print_tree_$bits(
$word_t *rbhash, $word_t max_node, $word_t node, $word_t mark_node,
void (*print_node)(void*,size_t,FILE*), void* userdata, FILE * out
) {
$word_t node_path[ 1+${NAMESPACE}_MAX_TREE_HEIGHT_$bits ];
bool cycle;
int i, pos, step= 0;
size_t nodecount= 0;
if (!node) {
fputs("(empty tree)\n", out);
return 0;
}
node_path[0]= 0;
node_path[pos= 1]= node << 1;
while (node && pos) {
switch (step) {
case 0:
// Check for cycles
cycle= false;
for (i= 1; i < pos; i++)
if ((node_path[i]>>1) == (node_path[pos]>>1))
cycle= true;
// Proceed down right subtree if possible
if (!cycle && pos < ${NAMESPACE}_MAX_TREE_HEIGHT_$bits
&& node <= max_node && rbhash[(node<<1)|1]
) {
node= rbhash[(node<<1)|1] >> 1;
node_path[++pos]= node << 1;
continue;
}
case 1:
// Print tree branches for nodes up until this one
for (i= 2; i < pos; i++)
fputs((node_path[i]&1) == (node_path[i+1]&1)? " " : " |", out);
if (pos > 1)
fputs((node_path[pos]&1)? " `" : " ,", out);
// Print content of this node
fprintf(out, "--%c%c%c #%ld%s ",
(node == mark_node? '(' : '-'),
(node > max_node? '!' : (rbhash[ (node_path[pos-1]|1) ^ (node_path[pos]&1) ]&1)? 'R':'B'),
(node == mark_node? ')' : ' '),
(long) node,
cycle? " CYCLE DETECTED"
: pos >= ${NAMESPACE}_MAX_TREE_HEIGHT_$bits? " MAX DEPTH EXCEEDED"
: node > max_node? " VALUE OUT OF BOUNDS"
: ""
);
if (print_node) print_node(userdata, node, out);
fputs("\n", out);
++nodecount;
// Proceed down left subtree if possible
if (!cycle && pos < ${NAMESPACE}_MAX_TREE_HEIGHT_$bits
&& node <= max_node && rbhash[node<<1]
) {
node= rbhash[node<<1] >> 1;
node_path[++pos]= (node << 1) | 1;
step= 0;
continue;
}
case 2:
// Return to parent
step= (node_path[pos]&1) + 1;
node= node_path[--pos] >> 1;
cycle= false;
}
}
return nodecount;
}
## }
void ${namespace}_print(
void *rbhash, size_t capacity, size_t n_buckets,
void (*print_node)(void*,size_t,FILE*), void* userdata, FILE *out
) {
size_t used= 0, collision= 0, empty=0, i;
fprintf(out, "# rbhash for capacity=%ld: %ld hash buckets, %ld bytes\n"
"--------------------\n",
(long) capacity, (long) n_buckets, (long) ${NAMESPACE}_SIZEOF(capacity, n_buckets));
## for my $bits (@bits) {
## my $word_t= word_type($bits);
## my $else= $bits > $min_bits? ' else':'';
$else if (capacity <= ${NAMESPACE}_MAX_ELEMENTS_$bits) {
$word_t *nodes= ($word_t*) rbhash;
$word_t *table= nodes + ${NAMESPACE}_TABLE_WORD_OFS(capacity);
for (i= 0; i < n_buckets; i++) {
if (table[i]) {
if (empty) {
fprintf(out, "(%ld empty buckets)\n", (long) empty);
empty= 0;
}
++used;
collision += ${namespace}_print_tree_$bits(rbhash, capacity, table[i]>>1, 0, print_node, userdata, out) - 1;
} else
++empty;
}
if (empty) {
fprintf(out, "(%ld empty buckets)\n", (long) empty);
empty= 0;
}
}
## }
fprintf(out, "--------------------\n"
"# used %ld/%ld buckets, %ld collisions\n",
(long) used, (long) n_buckets, (long) collision);
}
## }
## if ($feature_demo) {
struct userdata {
int *el;
int el_count, el_alloc;
int key;
};
int hash_function(int x) { return x; }
int cmp_el(void *data_p, size_t node) {
struct userdata *data= (struct userdata *) data_p;
return data->key < data->el[node-1]? -1 : data->key > data->el[node-1]? 1 : 0;
}
void print_node(void *data_p, size_t node, FILE *out) {
struct userdata *data= (struct userdata *) data_p;
fprintf(out, "%ld", (long) data->el[node-1]);
}
int userdata_insert(struct userdata *data, int value) {
int next= data->el_count;
size_t node_id;
data->key= value;
node_id= ${namespace}_insert(data->el + data->el_alloc, data->el_alloc,
next+1, hash_function(value) % data->el_alloc,
cmp_el, data
);
if (node_id == next+1) {
data->el[data->el_count++]= value;
return next;
}
return -1;
}
void userdata_extend(struct userdata *data) {
int i, lim, n= data->el_alloc? data->el_alloc << 1 : 16;
int *el= (int*) malloc(n*sizeof(int) + ${NAMESPACE}_SIZEOF(n,n));
if (!el) { perror("malloc"); abort(); }
memset(el+n, 0, ${NAMESPACE}_SIZEOF(n,n));
if (data->el) {
memcpy(el, data->el, data->el_alloc * sizeof(int));
free(data->el);
}
data->el= el;
data->el_alloc= n;
for (i= 0, lim= data->el_count, data->el_count= 0; i < lim; i++)
if (userdata_insert(data, data->el[i]) < i) { printf("BUG: insert failed\n"); abort(); }
}
int userdata_delete(struct userdata *data, int value) {
size_t node_id, node_id2;
data->key= value;
node_id= ${namespace}_delete(data->el + data->el_alloc, data->el_alloc,
hash_function(value) % data->el_alloc, cmp_el, data);
if (node_id) {
// If it wasn't the final node, swap this node with the final one
// and swap the element to match.
if (node_id != data->el_count) {
${namespace}_path p;
${namespace}_path_init(&p);
data->key= data->el[node_id-1]= data->el[data->el_count-1];
node_id2= ${namespace}_find_path(data->el + data->el_alloc, data->el_alloc, &p,
hash_function(data->key) % data->el_alloc, cmp_el, data);
if (node_id2 != data->el_count)
return -1;
node_id2= ${namespace}_path_swap(data->el + data->el_alloc, data->el_alloc, &p, node_id);
if (node_id2 != data->el_count)
return -1;
}
data->el_count--;
}
return node_id - 1;
}
int main() {
struct userdata data= { NULL, 0, 0, 0 };
userdata_extend(&data);
int value, idx;
fputs("Demo on 16-element array of int.\n"
"Each integer is used as its own hash code.\n"
"Trigger collisions using multiples of the table size.\n",
stdout);
while (!feof(stdin)) {
fputs("\nEnter a number (negative to delete): ", stdout);
fflush(stdout);
if (!scanf("%d", &value)) return 0;
if (value < 0) {
idx= userdata_delete(&data, -value);
if (idx >= 0) {
rbhash_print(data.el + data.el_alloc, data.el_alloc, data.el_alloc, print_node, &data, stdout);
printf("Deleted el[%ld]\n", (long)idx);
}
else printf("Not found, or err\n");
}
else if (value > 0) {
if (data.el_count >= data.el_alloc) {
if (data.el_count >= ${NAMESPACE}_MAX_ELEMENTS_${max_bits})
printf("Array full\n");
else
userdata_extend(&data);
}
idx= userdata_insert(&data, value);
if (idx >= 0) {
rbhash_print(data.el + data.el_alloc, data.el_alloc, data.el_alloc, print_node, &data, stdout);
printf("inserted at el[%d]\n", idx);
}
else if (idx < 0)
printf("insert failed, tree corrupt?\n");
else
printf("already exists at el[%d]\n", idx);
}
}
fputs("\n", stdout);
}
## }