Skip to content

Commit 53a3f1e

Browse files
ruff
1 parent 616bc45 commit 53a3f1e

File tree

1 file changed

+47
-17
lines changed

1 file changed

+47
-17
lines changed

src/humanloop/eval_utils.py

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@
4040
from .types import ToolKernelRequest as Tool
4141
from .types import BooleanEvaluatorStatsResponse as BooleanStats
4242
from .types import NumericEvaluatorStatsResponse as NumericStats
43-
from .types import UpdateDatesetAction as UpdateDatasetAction # TODO: fix original type typo
43+
from .types import (
44+
UpdateDatesetAction as UpdateDatasetAction,
45+
) # TODO: fix original type typo
4446
from .types import DatapointResponse as Datapoint
4547
from .types import (
4648
EvaluationStats,
@@ -60,7 +62,9 @@
6062
if not logger.hasHandlers():
6163
logger.addHandler(console_handler)
6264

63-
EvaluatorDict = Union[CodeEvaluatorDict, LLMEvaluatorDict, HumanEvaluatorDict, ExternalEvaluator]
65+
EvaluatorDict = Union[
66+
CodeEvaluatorDict, LLMEvaluatorDict, HumanEvaluatorDict, ExternalEvaluator
67+
]
6468
Version = Union[FlowDict, PromptDict, ToolDict, EvaluatorDict]
6569
FileType = Literal["flow", "prompt", "tool", "evaluator"]
6670

@@ -201,9 +205,13 @@ def _run_eval(
201205
function_ = file.pop("callable")
202206
except KeyError as _:
203207
if type_ == "flow":
204-
raise ValueError("You must provide a `callable` for your Flow `file` to run a local eval.")
208+
raise ValueError(
209+
"You must provide a `callable` for your Flow `file` to run a local eval."
210+
)
205211
else:
206-
logger.info(f"No `callable` provided for your {type_} file - will attempt to generate logs on Humanloop.")
212+
logger.info(
213+
f"No `callable` provided for your {type_} file - will attempt to generate logs on Humanloop."
214+
)
207215

208216
custom_logger = file.pop("custom_logger", None)
209217
file_dict = {**file, **version}
@@ -221,15 +229,19 @@ def _run_eval(
221229
try:
222230
_ = Prompt.parse_obj(version)
223231
except ValidationError as error_:
224-
logger.error(msg=f"Invalid Prompt `version` in your `file` request. \n\nValidation error: \n)")
232+
logger.error(
233+
msg=f"Invalid Prompt `version` in your `file` request. \n\nValidation error: \n)"
234+
)
225235
raise error_
226236
hl_file = client.prompts.upsert(**file_dict)
227237

228238
elif type_ == "tool":
229239
try:
230240
_ = Tool.parse_obj(version)
231241
except ValidationError as error_:
232-
logger.error(msg=f"Invalid Tool `version` in your `file` request. \n\nValidation error: \n)")
242+
logger.error(
243+
msg=f"Invalid Tool `version` in your `file` request. \n\nValidation error: \n)"
244+
)
233245
raise error_
234246
hl_file = client.tools.upsert(**file_dict)
235247

@@ -262,7 +274,9 @@ def _run_eval(
262274
attributes={"code": inspect.getsource(eval_function)},
263275
evaluator_type="external",
264276
)
265-
_ = client.evaluators.upsert(id=evaluator.get("id"), path=evaluator.get("path"), spec=spec)
277+
_ = client.evaluators.upsert(
278+
id=evaluator.get("id"), path=evaluator.get("path"), spec=spec
279+
)
266280

267281
# Validate upfront that the local Evaluators and Dataset fit
268282
requires_target = False
@@ -285,7 +299,6 @@ def _run_eval(
285299
try:
286300
evaluation = client.evaluations.create(
287301
name=name,
288-
dataset={"file_id": hl_dataset.id},
289302
evaluators=[{"path": e["path"]} for e in evaluators],
290303
file={"id": hl_file.id},
291304
)
@@ -324,7 +337,9 @@ def process_datapoint(datapoint: Datapoint):
324337
datapoint_dict = datapoint.dict()
325338
try:
326339
if "messages" in datapoint_dict:
327-
output = function_(**datapoint_dict["inputs"], messages=datapoint_dict["messages"])
340+
output = function_(
341+
**datapoint_dict["inputs"], messages=datapoint_dict["messages"]
342+
)
328343
else:
329344
output = function_(**datapoint_dict["inputs"])
330345
if custom_logger:
@@ -349,7 +364,9 @@ def process_datapoint(datapoint: Datapoint):
349364
start_time=start_time,
350365
end_time=datetime.now(),
351366
)
352-
logger.warning(msg=f"\nYour {type_}'s `callable` failed for Datapoint: {datapoint.id}. \n Error: {str(e)}")
367+
logger.warning(
368+
msg=f"\nYour {type_}'s `callable` failed for Datapoint: {datapoint.id}. \n Error: {str(e)}"
369+
)
353370

354371
# Apply local Evaluators
355372
for local_evaluator in local_evaluators:
@@ -382,7 +399,9 @@ def process_datapoint(datapoint: Datapoint):
382399
start_time=start_time,
383400
end_time=datetime.now(),
384401
)
385-
logger.warning(f"\nEvaluator {local_evaluator['path']} failed with error {str(e)}")
402+
logger.warning(
403+
f"\nEvaluator {local_evaluator['path']} failed with error {str(e)}"
404+
)
386405

387406
# Execute the function and send the logs to Humanloop in parallel
388407
total_datapoints = len(hl_dataset.datapoints)
@@ -393,17 +412,22 @@ def process_datapoint(datapoint: Datapoint):
393412
# Generate locally if a file `callable` is provided
394413
if function_:
395414
logger.info(
396-
f"{CYAN}\nRunning {hl_file.name} over the Dataset {hl_dataset.name} using {workers} workers{RESET} "
415+
f"{CYAN}\nRunning '{hl_file.name}' over the Dataset '{hl_dataset.name}' using {workers} workers{RESET} "
397416
)
398417
completed_tasks = 0
399418
with ThreadPoolExecutor(max_workers=workers) as executor:
400-
futures = [executor.submit(process_datapoint, datapoint) for datapoint in hl_dataset.datapoints]
419+
futures = [
420+
executor.submit(process_datapoint, datapoint)
421+
for datapoint in hl_dataset.datapoints
422+
]
401423
for _ in as_completed(futures):
402424
completed_tasks += 1
403425
_progress_bar(total_datapoints, completed_tasks)
404426
else:
405427
# TODO: trigger run when updated API is available
406-
logger.info(f"{CYAN}\nRunning {hl_file.name} over the Dataset {hl_dataset.name}{RESET}")
428+
logger.info(
429+
f"{CYAN}\nRunning '{hl_file.name}' over the Dataset '{hl_dataset.name}'{RESET}"
430+
)
407431

408432
# Wait for the Evaluation to complete then print the results
409433
complete = False
@@ -486,7 +510,9 @@ def _get_log_func(
486510
raise NotImplementedError(f"Unsupported File version: {type_}")
487511

488512

489-
def get_score_from_evaluator_stat(stat: Union[NumericStats, BooleanStats]) -> Union[float, None]:
513+
def get_score_from_evaluator_stat(
514+
stat: Union[NumericStats, BooleanStats],
515+
) -> Union[float, None]:
490516
"""Get the score from an Evaluator Stat."""
491517
score = None
492518
if isinstance(stat, BooleanStats):
@@ -613,10 +639,14 @@ def check_evaluation_improvement(
613639
previous_score = get_score_from_evaluator_stat(stat=previous_evaluator_stat)
614640
diff = round(latest_score - previous_score, 2)
615641
if diff >= 0:
616-
logger.info(f"{CYAN}Change of [{diff}] for Evaluator {evaluator_path}{RESET}")
642+
logger.info(
643+
f"{CYAN}Change of [{diff}] for Evaluator {evaluator_path}{RESET}"
644+
)
617645
return True, latest_score, diff
618646
else:
619-
logger.info(f"{CYAN}Change of [{diff}] for Evaluator {evaluator_path}{RESET}")
647+
logger.info(
648+
f"{CYAN}Change of [{diff}] for Evaluator {evaluator_path}{RESET}"
649+
)
620650
return False, latest_score, diff
621651
else:
622652
raise ValueError(f"Evaluator {evaluator_path} not found in the stats.")

0 commit comments

Comments
 (0)