4040from .types import ToolKernelRequest as Tool
4141from .types import BooleanEvaluatorStatsResponse as BooleanStats
4242from .types import NumericEvaluatorStatsResponse as NumericStats
43- from .types import UpdateDatesetAction as UpdateDatasetAction # TODO: fix original type typo
43+ from .types import (
44+ UpdateDatesetAction as UpdateDatasetAction ,
45+ ) # TODO: fix original type typo
4446from .types import DatapointResponse as Datapoint
4547from .types import (
4648 EvaluationStats ,
6062if not logger .hasHandlers ():
6163 logger .addHandler (console_handler )
6264
63- EvaluatorDict = Union [CodeEvaluatorDict , LLMEvaluatorDict , HumanEvaluatorDict , ExternalEvaluator ]
65+ EvaluatorDict = Union [
66+ CodeEvaluatorDict , LLMEvaluatorDict , HumanEvaluatorDict , ExternalEvaluator
67+ ]
6468Version = Union [FlowDict , PromptDict , ToolDict , EvaluatorDict ]
6569FileType = Literal ["flow" , "prompt" , "tool" , "evaluator" ]
6670
@@ -201,9 +205,13 @@ def _run_eval(
201205 function_ = file .pop ("callable" )
202206 except KeyError as _ :
203207 if type_ == "flow" :
204- raise ValueError ("You must provide a `callable` for your Flow `file` to run a local eval." )
208+ raise ValueError (
209+ "You must provide a `callable` for your Flow `file` to run a local eval."
210+ )
205211 else :
206- logger .info (f"No `callable` provided for your { type_ } file - will attempt to generate logs on Humanloop." )
212+ logger .info (
213+ f"No `callable` provided for your { type_ } file - will attempt to generate logs on Humanloop."
214+ )
207215
208216 custom_logger = file .pop ("custom_logger" , None )
209217 file_dict = {** file , ** version }
@@ -221,15 +229,19 @@ def _run_eval(
221229 try :
222230 _ = Prompt .parse_obj (version )
223231 except ValidationError as error_ :
224- logger .error (msg = f"Invalid Prompt `version` in your `file` request. \n \n Validation error: \n )" )
232+ logger .error (
233+ msg = f"Invalid Prompt `version` in your `file` request. \n \n Validation error: \n )"
234+ )
225235 raise error_
226236 hl_file = client .prompts .upsert (** file_dict )
227237
228238 elif type_ == "tool" :
229239 try :
230240 _ = Tool .parse_obj (version )
231241 except ValidationError as error_ :
232- logger .error (msg = f"Invalid Tool `version` in your `file` request. \n \n Validation error: \n )" )
242+ logger .error (
243+ msg = f"Invalid Tool `version` in your `file` request. \n \n Validation error: \n )"
244+ )
233245 raise error_
234246 hl_file = client .tools .upsert (** file_dict )
235247
@@ -262,7 +274,9 @@ def _run_eval(
262274 attributes = {"code" : inspect .getsource (eval_function )},
263275 evaluator_type = "external" ,
264276 )
265- _ = client .evaluators .upsert (id = evaluator .get ("id" ), path = evaluator .get ("path" ), spec = spec )
277+ _ = client .evaluators .upsert (
278+ id = evaluator .get ("id" ), path = evaluator .get ("path" ), spec = spec
279+ )
266280
267281 # Validate upfront that the local Evaluators and Dataset fit
268282 requires_target = False
@@ -285,7 +299,6 @@ def _run_eval(
285299 try :
286300 evaluation = client .evaluations .create (
287301 name = name ,
288- dataset = {"file_id" : hl_dataset .id },
289302 evaluators = [{"path" : e ["path" ]} for e in evaluators ],
290303 file = {"id" : hl_file .id },
291304 )
@@ -324,7 +337,9 @@ def process_datapoint(datapoint: Datapoint):
324337 datapoint_dict = datapoint .dict ()
325338 try :
326339 if "messages" in datapoint_dict :
327- output = function_ (** datapoint_dict ["inputs" ], messages = datapoint_dict ["messages" ])
340+ output = function_ (
341+ ** datapoint_dict ["inputs" ], messages = datapoint_dict ["messages" ]
342+ )
328343 else :
329344 output = function_ (** datapoint_dict ["inputs" ])
330345 if custom_logger :
@@ -349,7 +364,9 @@ def process_datapoint(datapoint: Datapoint):
349364 start_time = start_time ,
350365 end_time = datetime .now (),
351366 )
352- logger .warning (msg = f"\n Your { type_ } 's `callable` failed for Datapoint: { datapoint .id } . \n Error: { str (e )} " )
367+ logger .warning (
368+ msg = f"\n Your { type_ } 's `callable` failed for Datapoint: { datapoint .id } . \n Error: { str (e )} "
369+ )
353370
354371 # Apply local Evaluators
355372 for local_evaluator in local_evaluators :
@@ -382,7 +399,9 @@ def process_datapoint(datapoint: Datapoint):
382399 start_time = start_time ,
383400 end_time = datetime .now (),
384401 )
385- logger .warning (f"\n Evaluator { local_evaluator ['path' ]} failed with error { str (e )} " )
402+ logger .warning (
403+ f"\n Evaluator { local_evaluator ['path' ]} failed with error { str (e )} "
404+ )
386405
387406 # Execute the function and send the logs to Humanloop in parallel
388407 total_datapoints = len (hl_dataset .datapoints )
@@ -393,17 +412,22 @@ def process_datapoint(datapoint: Datapoint):
393412 # Generate locally if a file `callable` is provided
394413 if function_ :
395414 logger .info (
396- f"{ CYAN } \n Running { hl_file .name } over the Dataset { hl_dataset .name } using { workers } workers{ RESET } "
415+ f"{ CYAN } \n Running ' { hl_file .name } ' over the Dataset ' { hl_dataset .name } ' using { workers } workers{ RESET } "
397416 )
398417 completed_tasks = 0
399418 with ThreadPoolExecutor (max_workers = workers ) as executor :
400- futures = [executor .submit (process_datapoint , datapoint ) for datapoint in hl_dataset .datapoints ]
419+ futures = [
420+ executor .submit (process_datapoint , datapoint )
421+ for datapoint in hl_dataset .datapoints
422+ ]
401423 for _ in as_completed (futures ):
402424 completed_tasks += 1
403425 _progress_bar (total_datapoints , completed_tasks )
404426 else :
405427 # TODO: trigger run when updated API is available
406- logger .info (f"{ CYAN } \n Running { hl_file .name } over the Dataset { hl_dataset .name } { RESET } " )
428+ logger .info (
429+ f"{ CYAN } \n Running '{ hl_file .name } ' over the Dataset '{ hl_dataset .name } '{ RESET } "
430+ )
407431
408432 # Wait for the Evaluation to complete then print the results
409433 complete = False
@@ -486,7 +510,9 @@ def _get_log_func(
486510 raise NotImplementedError (f"Unsupported File version: { type_ } " )
487511
488512
489- def get_score_from_evaluator_stat (stat : Union [NumericStats , BooleanStats ]) -> Union [float , None ]:
513+ def get_score_from_evaluator_stat (
514+ stat : Union [NumericStats , BooleanStats ],
515+ ) -> Union [float , None ]:
490516 """Get the score from an Evaluator Stat."""
491517 score = None
492518 if isinstance (stat , BooleanStats ):
@@ -613,10 +639,14 @@ def check_evaluation_improvement(
613639 previous_score = get_score_from_evaluator_stat (stat = previous_evaluator_stat )
614640 diff = round (latest_score - previous_score , 2 )
615641 if diff >= 0 :
616- logger .info (f"{ CYAN } Change of [{ diff } ] for Evaluator { evaluator_path } { RESET } " )
642+ logger .info (
643+ f"{ CYAN } Change of [{ diff } ] for Evaluator { evaluator_path } { RESET } "
644+ )
617645 return True , latest_score , diff
618646 else :
619- logger .info (f"{ CYAN } Change of [{ diff } ] for Evaluator { evaluator_path } { RESET } " )
647+ logger .info (
648+ f"{ CYAN } Change of [{ diff } ] for Evaluator { evaluator_path } { RESET } "
649+ )
620650 return False , latest_score , diff
621651 else :
622652 raise ValueError (f"Evaluator { evaluator_path } not found in the stats." )
0 commit comments