@@ -148,7 +148,7 @@ fn to_treeboost_dataset(
148148fn benchmark_prediction_components ( c : & mut Criterion ) {
149149 let mut group = c. benchmark_group ( "PredictionProfile" ) ;
150150 group. measurement_time ( Duration :: from_secs ( 5 ) ) ;
151- group. sample_size ( 100 ) ;
151+ group. sample_size ( 20 ) ;
152152
153153 let num_features = 10 ;
154154 let num_rounds = 50 ;
@@ -157,7 +157,8 @@ fn benchmark_prediction_components(c: &mut Criterion) {
157157
158158 // Train model
159159 let ( train_features, train_targets) = generate_data ( train_rows, num_features, 42 ) ;
160- let train_data = to_treeboost_dataset ( & train_features, & train_targets, train_rows, num_features) ;
160+ let train_data =
161+ to_treeboost_dataset ( & train_features, & train_targets, train_rows, num_features) ;
161162 let config = GBDTConfig :: new ( )
162163 . with_num_rounds ( num_rounds)
163164 . with_max_depth ( 6 )
@@ -235,7 +236,7 @@ fn benchmark_histogram_building(c: &mut Criterion) {
235236
236237 let mut group = c. benchmark_group ( "HistogramProfile" ) ;
237238 group. measurement_time ( Duration :: from_secs ( 5 ) ) ;
238- group. sample_size ( 50 ) ;
239+ group. sample_size ( 20 ) ;
239240
240241 let num_rows = 100_000 ;
241242 let num_features = 10 ;
@@ -256,15 +257,11 @@ fn benchmark_histogram_building(c: &mut Criterion) {
256257 let builder = HistogramBuilder :: new ( ) ;
257258
258259 group. bench_function ( "histogram_contiguous_100k" , |b| {
259- b. iter ( || {
260- black_box ( builder. build ( & dataset, & contiguous_rows, & gradients, & hessians) )
261- } ) ;
260+ b. iter ( || black_box ( builder. build ( & dataset, & contiguous_rows, & gradients, & hessians) ) ) ;
262261 } ) ;
263262
264263 group. bench_function ( "histogram_sparse_50k" , |b| {
265- b. iter ( || {
266- black_box ( builder. build ( & dataset, & sparse_rows, & gradients, & hessians) )
267- } ) ;
264+ b. iter ( || black_box ( builder. build ( & dataset, & sparse_rows, & gradients, & hessians) ) ) ;
268265 } ) ;
269266
270267 // Single feature histogram - the innermost hot loop
@@ -361,10 +358,8 @@ fn benchmark_histogram_building(c: &mut Criterion) {
361358 for i in 0 ..remainder {
362359 let idx = * contiguous_rows. get_unchecked ( base + i) ;
363360 let bin = * feature_col. get_unchecked ( idx) as usize ;
364- bins. get_unchecked_mut ( bin) . accumulate (
365- * gradients. get_unchecked ( idx) ,
366- * hessians. get_unchecked ( idx) ,
367- ) ;
361+ bins. get_unchecked_mut ( bin)
362+ . accumulate ( * gradients. get_unchecked ( idx) , * hessians. get_unchecked ( idx) ) ;
368363 }
369364 }
370365 black_box ( hist)
@@ -373,49 +368,78 @@ fn benchmark_histogram_building(c: &mut Criterion) {
373368
374369 // Sparse data benchmarks (90% zeros - common in text/recommender systems)
375370 let ( sparse_features, sparse_targets) = generate_sparse_data ( num_rows, num_features, 0.9 , 42 ) ;
376- let sparse_dataset = to_treeboost_dataset ( & sparse_features, & sparse_targets, num_rows, num_features) ;
371+ let sparse_dataset =
372+ to_treeboost_dataset ( & sparse_features, & sparse_targets, num_rows, num_features) ;
377373 let sparse_gradients: Vec < f32 > = ( 0 ..num_rows) . map ( |i| ( i as f32 ) * 0.001 ) . collect ( ) ;
378374 let sparse_hessians: Vec < f32 > = vec ! [ 1.0 ; num_rows] ;
379375
380376 // Report sparsity detected
381377 let num_sparse = sparse_dataset. num_sparse_features ( ) ;
382- eprintln ! ( "Sparse dataset: {}/{} features detected as sparse" , num_sparse, num_features) ;
378+ eprintln ! (
379+ "Sparse dataset: {}/{} features detected as sparse" ,
380+ num_sparse, num_features
381+ ) ;
383382
384383 group. bench_function ( "histogram_sparse_data_contiguous_100k" , |b| {
385384 b. iter ( || {
386- black_box ( builder. build ( & sparse_dataset, & contiguous_rows, & sparse_gradients, & sparse_hessians) )
385+ black_box ( builder. build (
386+ & sparse_dataset,
387+ & contiguous_rows,
388+ & sparse_gradients,
389+ & sparse_hessians,
390+ ) )
387391 } ) ;
388392 } ) ;
389393
390394 group. bench_function ( "histogram_sparse_data_indexed_50k" , |b| {
391395 b. iter ( || {
392- black_box ( builder. build ( & sparse_dataset, & sparse_rows, & sparse_gradients, & sparse_hessians) )
396+ black_box ( builder. build (
397+ & sparse_dataset,
398+ & sparse_rows,
399+ & sparse_gradients,
400+ & sparse_hessians,
401+ ) )
393402 } ) ;
394403 } ) ;
395404
396405 // 95% sparse (even more extreme - text data)
397- let ( very_sparse_features, very_sparse_targets) = generate_sparse_data ( num_rows, num_features, 0.95 , 42 ) ;
398- let very_sparse_dataset = to_treeboost_dataset ( & very_sparse_features, & very_sparse_targets, num_rows, num_features) ;
406+ let ( very_sparse_features, very_sparse_targets) =
407+ generate_sparse_data ( num_rows, num_features, 0.95 , 42 ) ;
408+ let very_sparse_dataset = to_treeboost_dataset (
409+ & very_sparse_features,
410+ & very_sparse_targets,
411+ num_rows,
412+ num_features,
413+ ) ;
399414 let num_very_sparse = very_sparse_dataset. num_sparse_features ( ) ;
400- eprintln ! ( "Very sparse dataset: {}/{} features detected as sparse" , num_very_sparse, num_features) ;
415+ eprintln ! (
416+ "Very sparse dataset: {}/{} features detected as sparse" ,
417+ num_very_sparse, num_features
418+ ) ;
401419
402420 group. bench_function ( "histogram_very_sparse_contiguous_100k" , |b| {
403421 b. iter ( || {
404- black_box ( builder. build ( & very_sparse_dataset, & contiguous_rows, & sparse_gradients, & sparse_hessians) )
422+ black_box ( builder. build (
423+ & very_sparse_dataset,
424+ & contiguous_rows,
425+ & sparse_gradients,
426+ & sparse_hessians,
427+ ) )
405428 } ) ;
406429 } ) ;
407430
408431 // Benchmark a single tree grow (the main per-round cost)
409432 group. bench_function ( "single_tree_grow_100k" , |b| {
410- use treeboost:: tree:: TreeGrower ;
411433 use treeboost:: loss:: { LossFunction , MseLoss } ;
434+ use treeboost:: tree:: TreeGrower ;
412435
413436 let loss = MseLoss :: new ( ) ;
414437 let predictions = vec ! [ 0.0f32 ; num_rows] ;
415438 let targets_f32: Vec < f32 > = targets. iter ( ) . map ( |& t| t as f32 ) . collect ( ) ;
416439
417440 // Pre-compute gradients/hessians
418- let ( grads, hess) : ( Vec < f32 > , Vec < f32 > ) = targets_f32. iter ( )
441+ let ( grads, hess) : ( Vec < f32 > , Vec < f32 > ) = targets_f32
442+ . iter ( )
419443 . zip ( & predictions)
420444 . map ( |( & t, & p) | loss. gradient_hessian ( t, p) )
421445 . unzip ( ) ;
@@ -428,9 +452,7 @@ fn benchmark_histogram_building(c: &mut Criterion) {
428452
429453 let row_indices: Vec < usize > = ( 0 ..num_rows) . collect ( ) ;
430454
431- b. iter ( || {
432- black_box ( grower. grow_with_indices ( & dataset, & grads, & hess, & row_indices) )
433- } ) ;
455+ b. iter ( || black_box ( grower. grow_with_indices ( & dataset, & grads, & hess, & row_indices) ) ) ;
434456 } ) ;
435457
436458 group. finish ( ) ;
@@ -647,14 +669,15 @@ fn benchmark_efb(c: &mut Criterion) {
647669
648670 let mut group = c. benchmark_group ( "EFBProfile" ) ;
649671 group. measurement_time ( Duration :: from_secs ( 5 ) ) ;
650- group. sample_size ( 50 ) ;
672+ group. sample_size ( 20 ) ;
651673
652674 // Test case: 10 categories with 10 one-hot bins each = 100 features → should bundle to 10
653675 let num_rows = 100_000 ;
654676 let num_categories = 10 ;
655677 let bins_per_category = 10 ;
656678
657- let ( features, targets, feature_info) = generate_onehot_data ( num_rows, num_categories, bins_per_category, 42 ) ;
679+ let ( features, targets, feature_info) =
680+ generate_onehot_data ( num_rows, num_categories, bins_per_category, 42 ) ;
658681 let total_features = num_categories * bins_per_category;
659682
660683 let dataset = BinnedDataset :: new ( num_rows, features, targets, feature_info) ;
@@ -678,9 +701,7 @@ fn benchmark_efb(c: &mut Criterion) {
678701 ) ;
679702
680703 group. bench_function ( "bundled_dataset_creation" , |b| {
681- b. iter ( || {
682- black_box ( BundledDataset :: from_dataset ( & dataset, & bundling) )
683- } ) ;
704+ b. iter ( || black_box ( BundledDataset :: from_dataset ( & dataset, & bundling) ) ) ;
684705 } ) ;
685706
686707 let bundled = BundledDataset :: from_dataset ( & dataset, & bundling) ;
@@ -698,9 +719,7 @@ fn benchmark_efb(c: &mut Criterion) {
698719 let builder = HistogramBuilder :: new ( ) ;
699720
700721 group. bench_function ( "histogram_original_100_features" , |b| {
701- b. iter ( || {
702- black_box ( builder. build ( & dataset, & row_indices, & gradients, & hessians) )
703- } ) ;
722+ b. iter ( || black_box ( builder. build ( & dataset, & row_indices, & gradients, & hessians) ) ) ;
704723 } ) ;
705724
706725 // For bundled histogram, we build on each bundle column
@@ -744,22 +763,22 @@ fn benchmark_efb(c: &mut Criterion) {
744763 ) ;
745764
746765 group. bench_function ( "histogram_original_500_features" , |b| {
747- b. iter ( || {
748- black_box ( builder. build ( & dataset_medium, & row_indices, & gradients, & hessians) )
749- } ) ;
766+ b. iter ( || black_box ( builder. build ( & dataset_medium, & row_indices, & gradients, & hessians) ) ) ;
750767 } ) ;
751768
752769 group. finish ( ) ;
753770}
754771
755772fn benchmark_split_finding ( c : & mut Criterion ) {
756773 use treeboost:: histogram:: { Histogram , NodeHistograms } ;
774+ use treeboost:: kernel:: {
775+ find_best_split, find_best_split_scalar, find_best_split_simd, has_avx2,
776+ } ;
757777 use treeboost:: tree:: SplitFinder ;
758- use treeboost:: kernel:: { find_best_split_scalar, find_best_split_simd, find_best_split, has_avx2} ;
759778
760779 let mut group = c. benchmark_group ( "SplitFindingProfile" ) ;
761780 group. measurement_time ( Duration :: from_secs ( 5 ) ) ;
762- group. sample_size ( 100 ) ;
781+ group. sample_size ( 20 ) ;
763782
764783 // Report SIMD availability
765784 eprintln ! ( "AVX2 available: {}" , has_avx2( ) ) ;
@@ -809,9 +828,7 @@ fn benchmark_split_finding(c: &mut Criterion) {
809828 group. bench_function ( "kernel_scalar_single_feature" , |b| {
810829 b. iter ( || {
811830 black_box ( find_best_split_scalar (
812- & grads, & hess, & counts,
813- total_g, total_h, total_n,
814- 1.0 , 1 , 1.0 ,
831+ & grads, & hess, & counts, total_g, total_h, total_n, 1.0 , 1 , 1.0 ,
815832 ) )
816833 } ) ;
817834 } ) ;
@@ -823,9 +840,7 @@ fn benchmark_split_finding(c: &mut Criterion) {
823840 b. iter ( || {
824841 black_box ( unsafe {
825842 find_best_split_simd (
826- & grads, & hess, & counts,
827- total_g, total_h, total_n,
828- 1.0 , 1 , 1.0 ,
843+ & grads, & hess, & counts, total_g, total_h, total_n, 1.0 , 1 , 1.0 ,
829844 )
830845 } )
831846 } ) ;
@@ -836,9 +851,7 @@ fn benchmark_split_finding(c: &mut Criterion) {
836851 group. bench_function ( "kernel_dispatched_single_feature" , |b| {
837852 b. iter ( || {
838853 black_box ( find_best_split (
839- & grads, & hess, & counts,
840- total_g, total_h, total_n,
841- 1.0 , 1 , 1.0 ,
854+ & grads, & hess, & counts, total_g, total_h, total_n, 1.0 , 1 , 1.0 ,
842855 ) )
843856 } ) ;
844857 } ) ;
@@ -880,13 +893,23 @@ fn benchmark_split_finding(c: &mut Criterion) {
880893
881894 group. bench_function ( "splitfinder_20_features_no_entropy" , |b| {
882895 b. iter ( || {
883- black_box ( finder_no_entropy. find_best_split ( & multi_histograms, total_g, total_h, total_n) )
896+ black_box ( finder_no_entropy. find_best_split (
897+ & multi_histograms,
898+ total_g,
899+ total_h,
900+ total_n,
901+ ) )
884902 } ) ;
885903 } ) ;
886904
887905 group. bench_function ( "splitfinder_20_features_with_entropy" , |b| {
888906 b. iter ( || {
889- black_box ( finder_with_entropy. find_best_split ( & multi_histograms, total_g, total_h, total_n) )
907+ black_box ( finder_with_entropy. find_best_split (
908+ & multi_histograms,
909+ total_g,
910+ total_h,
911+ total_n,
912+ ) )
890913 } ) ;
891914 } ) ;
892915
@@ -899,15 +922,20 @@ fn benchmark_split_finding(c: &mut Criterion) {
899922
900923 group. bench_function ( "splitfinder_100_features_no_entropy" , |b| {
901924 b. iter ( || {
902- black_box ( finder_no_entropy. find_best_split ( & wide_histograms, total_g, total_h, total_n) )
925+ black_box ( finder_no_entropy. find_best_split (
926+ & wide_histograms,
927+ total_g,
928+ total_h,
929+ total_n,
930+ ) )
903931 } ) ;
904932 } ) ;
905933
906934 group. finish ( ) ;
907935}
908936
909937fn benchmark_4bit_unpacking ( c : & mut Criterion ) {
910- use treeboost:: dataset:: packed:: PackedColumn ;
938+ use treeboost:: dataset:: packed:: { PackedColumn , PackedDataset } ;
911939
912940 let mut group = c. benchmark_group ( "4bit_unpacking" ) ;
913941 group. measurement_time ( Duration :: from_secs ( 5 ) ) ;
@@ -956,7 +984,55 @@ fn benchmark_4bit_unpacking(c: &mut Criterion) {
956984 } ) ;
957985
958986 group. finish ( ) ;
987+
988+ // Benchmark to_binned() conversion (PackedDataset -> BinnedDataset)
989+ let mut group = c. benchmark_group ( "packed_to_binned" ) ;
990+ group. measurement_time ( Duration :: from_secs ( 5 ) ) ;
991+ group. sample_size ( 20 ) ;
992+
993+ for ( num_rows, num_features) in [ ( 10_000 , 20 ) , ( 100_000 , 20 ) , ( 100_000 , 100 ) ] {
994+ // Create a BinnedDataset with low-cardinality features (packable)
995+ let mut features = Vec :: with_capacity ( num_rows * num_features) ;
996+ for _ in 0 ..num_features {
997+ for r in 0 ..num_rows {
998+ features. push ( ( r % 16 ) as u8 ) ;
999+ }
1000+ }
1001+ let targets: Vec < f32 > = ( 0 ..num_rows) . map ( |i| i as f32 ) . collect ( ) ;
1002+ let feature_info: Vec < FeatureInfo > = ( 0 ..num_features)
1003+ . map ( |f| FeatureInfo {
1004+ name : format ! ( "f{}" , f) ,
1005+ feature_type : FeatureType :: Categorical ,
1006+ num_bins : 16 ,
1007+ bin_boundaries : vec ! [ ] ,
1008+ } )
1009+ . collect ( ) ;
1010+
1011+ let binned = BinnedDataset :: new ( num_rows, features, targets, feature_info) ;
1012+ let packed = PackedDataset :: from_binned ( & binned) ;
1013+
1014+ eprintln ! (
1015+ "PackedDataset {num_rows}x{num_features}: {:.1}% memory savings" ,
1016+ packed. memory_savings( ) * 100.0
1017+ ) ;
1018+
1019+ group. bench_function ( format ! ( "to_binned_{}x{}" , num_rows, num_features) , |b| {
1020+ b. iter ( || {
1021+ black_box ( packed. to_binned ( ) ) ;
1022+ } ) ;
1023+ } ) ;
1024+ }
1025+
1026+ group. finish ( ) ;
9591027}
9601028
961- criterion_group ! ( benches, benchmark_prediction_components, benchmark_histogram_building, benchmark_training_components, benchmark_efb, benchmark_split_finding, benchmark_4bit_unpacking) ;
1029+ criterion_group ! (
1030+ benches,
1031+ benchmark_prediction_components,
1032+ benchmark_histogram_building,
1033+ benchmark_training_components,
1034+ benchmark_efb,
1035+ benchmark_split_finding,
1036+ benchmark_4bit_unpacking
1037+ ) ;
9621038criterion_main ! ( benches) ;
0 commit comments