2727
2828from hed import HedString , QueryHandler # noqa: E402
2929from hed .models .basic_search import find_matching # noqa: E402
30- from hed .models .string_search import StringQueryHandler , search_series # noqa: E402
30+ from hed .models .string_search import StringQueryHandler , string_search # noqa: E402
3131
3232from data_generator import DataGenerator # noqa: E402
3333
@@ -143,7 +143,7 @@ def _bench_basic(self, raw, query, cfg_label, q_label):
143143 med , _ = time_it (lambda : find_matching (series , query ), self .n_runs )
144144 matches = int (find_matching (series , query ).sum ())
145145 return {
146- "engine" : "basic_search " ,
146+ "engine" : "Basic search " ,
147147 "query_label" : q_label ,
148148 "config_label" : cfg_label ,
149149 "query" : query ,
@@ -166,7 +166,7 @@ def do_search():
166166 search_med , _ = time_it (do_search , self .n_runs )
167167 result = do_search ()
168168 return {
169- "engine" : "QueryHandler " ,
169+ "engine" : "Object search " ,
170170 "query_label" : q_label ,
171171 "config_label" : cfg_label ,
172172 "query" : query ,
@@ -181,8 +181,9 @@ def _bench_string_qh(self, raw, query, cfg_label, q_label, schema_lookup, suffix
181181 sqh = StringQueryHandler (query )
182182 search_med , _ = time_it (lambda : sqh .search (raw , schema_lookup = schema_lookup ), self .n_runs )
183183 result = sqh .search (raw , schema_lookup = schema_lookup )
184+ label = "String search" if suffix == "no_lookup" else "String search (lookup)"
184185 return {
185- "engine" : f"StringQueryHandler_ { suffix } " ,
186+ "engine" : label ,
186187 "query_label" : q_label ,
187188 "config_label" : cfg_label ,
188189 "query" : query ,
@@ -235,15 +236,15 @@ def run_all(self, series_configs):
235236 rec = self ._bench_basic_series (series , bs_query , label , q_label , n_rows )
236237 records .append (rec )
237238
238- # --- search_series (StringQueryHandler) no lookup ---
239+ # --- String search (StringQueryHandler) no lookup ---
239240 rec = self ._bench_search_series (series , qh_query , label , q_label , n_rows , None , "no_lookup" )
240241 records .append (rec )
241242
242- # --- search_series (StringQueryHandler) with lookup ---
243+ # --- String search (StringQueryHandler) with lookup ---
243244 rec = self ._bench_search_series (series , qh_query , label , q_label , n_rows , self .lookup , "with_lookup" )
244245 records .append (rec )
245246
246- # --- QueryHandler loop ---
247+ # --- Object search ( QueryHandler loop) ---
247248 rec = self ._bench_qh_loop (series , qh_query , label , q_label , n_rows )
248249 records .append (rec )
249250
@@ -253,7 +254,7 @@ def _bench_basic_series(self, series, query, cfg_label, q_label, n_rows):
253254 med , _ = time_it (lambda : find_matching (series , query ), self .n_runs )
254255 matches = int (find_matching (series , query ).sum ())
255256 return {
256- "engine" : "basic_search " ,
257+ "engine" : "Basic search " ,
257258 "query_label" : q_label ,
258259 "config_label" : cfg_label ,
259260 "n_rows" : n_rows ,
@@ -263,10 +264,12 @@ def _bench_basic_series(self, series, query, cfg_label, q_label, n_rows):
263264 }
264265
265266 def _bench_search_series (self , series , query , cfg_label , q_label , n_rows , lookup , suffix ):
266- med , _ = time_it (lambda : search_series (series , query , schema_lookup = lookup ), self .n_runs )
267- matches = int (search_series (series , query , schema_lookup = lookup ).sum ())
267+ strings = series .tolist ()
268+ med , _ = time_it (lambda : string_search (strings , query , schema_lookup = lookup ), self .n_runs )
269+ matches = sum (string_search (strings , query , schema_lookup = lookup ))
270+ label = "String search" if suffix == "no_lookup" else "String search (lookup)"
268271 return {
269- "engine" : f"search_series_ { suffix } " ,
272+ "engine" : label ,
270273 "query_label" : q_label ,
271274 "config_label" : cfg_label ,
272275 "n_rows" : n_rows ,
@@ -294,7 +297,7 @@ def do_all():
294297 if qh .search (hs ):
295298 count += 1
296299 return {
297- "engine" : "QueryHandler_loop " ,
300+ "engine" : "Object search " ,
298301 "query_label" : q_label ,
299302 "config_label" : cfg_label ,
300303 "n_rows" : n_rows ,
@@ -402,16 +405,35 @@ def sweep_query_complexity(self):
402405 return records
403406
404407 def sweep_schema_lookup (self ):
405- """Compare StringQueryHandler with vs without schema_lookup."""
406- raw = self .gen .make_string (n_tags = 15 , n_groups = 3 , depth = 1 )
407- query = "Event"
408- sqh = StringQueryHandler (query )
408+ """Compare StringQueryHandler with vs without schema_lookup across query types.
409+
410+ Uses a fixed short-form string containing known descendants of Event and Action so
411+ the behavioural difference (which strings match) is deterministic.
412+ """
413+ # Fixed short-form string with known Event and Action descendants.
414+ # Sensory-event, Agent-action, Data-feature are Event descendants;
415+ # Communicate, Clap-hands are Action descendants.
416+ raw = (
417+ "Sensory-event, Agent-action, Data-feature, Communicate, Clap-hands, "
418+ "Communicate-gesturally, Blue, High, (Red, Move), (Experiment-control, Frown)"
419+ )
420+ queries = [
421+ ("Ancestor: Event" , "Event" ),
422+ ("Ancestor: Action" , "Action" ),
423+ ("Exact: Sensory-event" , "Sensory-event" ),
424+ ("Compound: Event && Action" , "Event && Action" ),
425+ ]
409426 records = []
410- for with_lookup in [False , True ]:
411- lk = self .lookup if with_lookup else None
412- label = "with_lookup" if with_lookup else "no_lookup"
413- med , _ = time_it (lambda lk = lk : sqh .search (raw , schema_lookup = lk ), self .n_runs )
414- records .append ({"factor" : "schema_lookup" , "level" : label , "engine" : "StringQueryHandler" , "time" : med })
427+ for q_label , query in queries :
428+ sqh = StringQueryHandler (query )
429+ for with_lookup in [False , True ]:
430+ lk = self .lookup if with_lookup else None
431+ mode = "With lookup" if with_lookup else "No lookup"
432+ med , _ = time_it (lambda lk = lk , _sqh = sqh : _sqh .search (raw , schema_lookup = lk ), self .n_runs )
433+ matches = len (sqh .search (raw , schema_lookup = lk ))
434+ records .append (
435+ {"factor" : "schema_lookup" , "level" : q_label , "engine" : mode , "time" : med , "matches" : matches }
436+ )
415437 return records
416438
417439 def sweep_string_form (self ):
@@ -440,18 +462,18 @@ def qh_search():
440462 qh .search (hs )
441463
442464 search_med , _ = time_it (qh_search , self .n_runs )
443- records .append ({"factor" : "compile_vs_search" , "level" : "compile" , "engine" : "QueryHandler" , "time" : comp })
444- records .append ({"factor" : "compile_vs_search" , "level" : "search" , "engine" : "QueryHandler" , "time" : search_med })
465+ records .append ({"factor" : "compile_vs_search" , "level" : "compile" , "engine" : "Object search" , "time" : comp })
466+ records .append (
467+ {"factor" : "compile_vs_search" , "level" : "search" , "engine" : "Object search" , "time" : search_med }
468+ )
445469
446470 # StringQueryHandler
447471 comp2 , _ = time_it (lambda : StringQueryHandler (query ), self .n_runs )
448472 sqh = StringQueryHandler (query )
449473 search_med2 , _ = time_it (lambda : sqh .search (raw , schema_lookup = self .lookup ), self .n_runs )
474+ records .append ({"factor" : "compile_vs_search" , "level" : "compile" , "engine" : "String search" , "time" : comp2 })
450475 records .append (
451- {"factor" : "compile_vs_search" , "level" : "compile" , "engine" : "StringQueryHandler" , "time" : comp2 }
452- )
453- records .append (
454- {"factor" : "compile_vs_search" , "level" : "search" , "engine" : "StringQueryHandler" , "time" : search_med2 }
476+ {"factor" : "compile_vs_search" , "level" : "search" , "engine" : "String search" , "time" : search_med2 }
455477 )
456478
457479 return records
@@ -534,7 +556,7 @@ def _bench_all_engines(self, raw, qh_query, bs_query=None):
534556 # basic_search
535557 if bs_query is not None :
536558 med , _ = time_it (lambda : find_matching (series1 , bs_query ), self .n_runs )
537- yield "basic_search " , med
559+ yield "Basic search " , med
538560
539561 # QueryHandler
540562 qh = QueryHandler (qh_query )
@@ -544,31 +566,32 @@ def qh_search():
544566 qh .search (hs )
545567
546568 med , _ = time_it (qh_search , self .n_runs )
547- yield "QueryHandler " , med
569+ yield "Object search " , med
548570
549571 # StringQueryHandler no lookup
550572 sqh = StringQueryHandler (qh_query )
551573 med , _ = time_it (lambda : sqh .search (raw , schema_lookup = None ), self .n_runs )
552- yield "SQH_no_lookup " , med
574+ yield "String search " , med
553575
554576 # StringQueryHandler with lookup
555577 med , _ = time_it (lambda : sqh .search (raw , schema_lookup = self .lookup ), self .n_runs )
556- yield "SQH_with_lookup " , med
578+ yield "String search (lookup) " , med
557579
558580 def _bench_series_engines (self , series , qh_query , bs_query , n_rows ):
559581 """Yield (engine_name, median_time) for series-level engines."""
560582 # basic_search
561583 if bs_query is not None :
562- med , _ = time_it (lambda : find_matching (series , bs_query ), max ( 3 , self .n_runs // 2 ) )
563- yield "basic_search " , med
584+ med , _ = time_it (lambda : find_matching (series , bs_query ), self .n_runs )
585+ yield "Basic search " , med
564586
565- # search_series no lookup
566- med , _ = time_it (lambda : search_series (series , qh_query , schema_lookup = None ), max (3 , self .n_runs // 2 ))
567- yield "search_series_no_lookup" , med
587+ # String search no lookup
588+ strings = series .tolist ()
589+ med , _ = time_it (lambda : string_search (strings , qh_query , schema_lookup = None ), self .n_runs )
590+ yield "String search" , med
568591
569- # search_series with lookup
570- med , _ = time_it (lambda : search_series ( series , qh_query , schema_lookup = self .lookup ), max ( 3 , self .n_runs // 2 ) )
571- yield "search_series_with_lookup " , med
592+ # String search with lookup
593+ med , _ = time_it (lambda : string_search ( strings , qh_query , schema_lookup = self .lookup ), self .n_runs )
594+ yield "String search (lookup) " , med
572595
573596 # QueryHandler loop
574597 qh = QueryHandler (qh_query )
@@ -580,8 +603,8 @@ def qh_loop():
580603 hs = HedString (s , schema )
581604 qh .search (hs )
582605
583- med , _ = time_it (qh_loop , max ( 3 , self .n_runs // 2 ) )
584- yield "QueryHandler_loop " , med
606+ med , _ = time_it (qh_loop , self .n_runs )
607+ yield "Object search " , med
585608
586609
587610# ======================================================================
@@ -595,7 +618,7 @@ def run_full_benchmark(quick=False):
595618 gen = DataGenerator ()
596619
597620 n_single = 10 if quick else 20
598- n_series = 3 if quick else 5
621+ n_series = 3 if quick else 10
599622 n_sweep = 5 if quick else 10
600623
601624 # ------------------------------------------------------------------
@@ -675,20 +698,22 @@ def run_full_benchmark(quick=False):
675698 med , _ = time_it (lambda bs_query = bs_query : find_matching (real_series , bs_query ), n_series )
676699 real_results .append (
677700 {
678- "engine" : "basic_search " ,
701+ "engine" : "Basic search " ,
679702 "query_label" : q_label ,
680703 "total_time" : med ,
681704 "per_row" : med / real_n ,
682705 "n_rows" : real_n ,
683706 }
684707 )
685708
709+ real_strings = real_series .tolist ()
686710 med , _ = time_it (
687- lambda qh_query = qh_query : search_series (real_series , qh_query , schema_lookup = gen .lookup ), n_series
711+ lambda qh_query = qh_query , _rs = real_strings : string_search (_rs , qh_query , schema_lookup = gen .lookup ),
712+ n_series ,
688713 )
689714 real_results .append (
690715 {
691- "engine" : "search_series " ,
716+ "engine" : "String search " ,
692717 "query_label" : q_label ,
693718 "total_time" : med ,
694719 "per_row" : med / real_n ,
@@ -708,7 +733,7 @@ def qh_loop(qh=qh, schema=schema):
708733 med , _ = time_it (qh_loop , n_series )
709734 real_results .append (
710735 {
711- "engine" : "QueryHandler_loop " ,
736+ "engine" : "Object search " ,
712737 "query_label" : q_label ,
713738 "total_time" : med ,
714739 "per_row" : med / real_n ,
0 commit comments