Skip to content

Commit d5e19d1

Browse files
committed
Allow not providing datapoints to a dataset and remove custom logger.
1 parent 336e23f commit d5e19d1

File tree

1 file changed

+37
-49
lines changed

1 file changed

+37
-49
lines changed

src/humanloop/eval_utils.py

Lines changed: 37 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from typing_extensions import NotRequired, TypedDict
1919
import time
2020
import sys
21+
import json
2122
from concurrent.futures import ThreadPoolExecutor, as_completed
2223

2324
from .client import BaseHumanloop
@@ -34,7 +35,7 @@
3435
from .requests import HumanEvaluatorRequestParams as HumanEvaluatorDict
3536

3637

37-
# Responses are Pydantic models and we leverage them for improved request validation
38+
# Responses are Pydantic models, we leverage them for improved request validation
3839
from .types import FlowKernelRequest as Flow
3940
from .types import PromptKernelRequest as Prompt
4041
from .types import ToolKernelRequest as Tool
@@ -97,22 +98,12 @@ class File(Identifiers):
9798
"""The function being evaluated.
9899
It will be called using your Dataset `inputs` as follows: `output = callable(**datapoint.inputs)`.
99100
If `messages` are defined in your Dataset, then `output = callable(**datapoint.inputs, messages=datapoint.messages)`.
100-
It should return a single string output. If not, you must provide a `custom_logger`.
101-
"""
102-
custom_logger: NotRequired[Callable]
103-
"""function that logs the output of your function to Humanloop, replacing the default logging.
104-
If provided, it will be called as follows:
105-
```
106-
output = callable(**datapoint.inputs).
107-
log = custom_logger(client, output)
108-
```
109-
Inside the custom_logger, you can use the Humanloop `client` to log the output of your function.
110-
If not provided your pipline must return a single string.
101+
It should return a single string or json output.
111102
"""
112103

113104

114105
class Dataset(Identifiers):
115-
datapoints: Sequence[DatapointDict]
106+
datapoints: NotRequired[Sequence[DatapointDict]]
116107
"""The datapoints to map your function over to produce the outputs required by the evaluation."""
117108
action: NotRequired[UpdateDatasetAction]
118109
"""How to update the Dataset given the provided Datapoints;
@@ -128,15 +119,6 @@ class Evaluator(Identifiers):
128119
"""The type of return value the Evaluator produces - only required for local Evaluators."""
129120
callable: NotRequired[Callable]
130121
"""The function to run on the logs to produce the judgment - only required for local Evaluators."""
131-
custom_logger: NotRequired[Callable]
132-
"""optional function that logs the output judgment from your Evaluator to Humanloop, if provided, it will be called as follows:
133-
```
134-
judgment = callable(log_dict)
135-
log = custom_logger(client, judgmemt)
136-
```
137-
Inside the custom_logger, you can use the Humanloop `client` to log the judgment to Humanloop.
138-
If not provided your function must return a single string and by default the code will be used to inform the version of the external Evaluator on Humanloop.
139-
"""
140122
threshold: NotRequired[float]
141123
"""The threshold to check the Evaluator against. If the aggregate value of the Evaluator is below this threshold, the check will fail."""
142124

@@ -213,7 +195,6 @@ def _run_eval(
213195
f"No `callable` provided for your {type_} file - will attempt to generate logs on Humanloop."
214196
)
215197

216-
custom_logger = file.pop("custom_logger", None)
217198
file_dict = {**file, **version}
218199

219200
if type_ == "flow":
@@ -252,8 +233,17 @@ def _run_eval(
252233
raise NotImplementedError(f"Unsupported File type: {type_}")
253234

254235
# Upsert the Dataset
255-
hl_dataset = client.datasets.upsert(**dataset)
256-
hl_dataset = client.datasets.get(id=hl_dataset.id, include_datapoints=True)
236+
action = dataset.get("action", "set") # set is the server default - None not allowed.
237+
if "datapoints" not in dataset:
238+
dataset["datapoints"] = []
239+
# Use `upsert` to get existing dataset ID if no datapoints provided, given we can't `get` on path.
240+
action = "add"
241+
hl_dataset = client.datasets.upsert(**dataset, action=action)
242+
hl_dataset = client.datasets.get(
243+
id=hl_dataset.id,
244+
version_id=hl_dataset.version_id,
245+
include_datapoints=True
246+
)
257247

258248
# Upsert the local Evaluators; other Evaluators are just referenced by `path` or `id`
259249
local_evaluators: List[Evaluator] = []
@@ -341,20 +331,22 @@ def process_datapoint(datapoint: Datapoint):
341331
)
342332
else:
343333
output = function_(**datapoint_dict["inputs"])
344-
if custom_logger:
345-
log = function_(client=client, output=output)
346-
else:
347-
if not isinstance(output, str):
334+
335+
if not isinstance(output, str):
336+
try:
337+
output = json.dumps(output)
338+
# throw error if it fails to serialize
339+
except Exception as _:
348340
raise ValueError(
349-
f"Your {type_}'s `callable` must return a string if you do not provide a custom logger."
341+
f"Your {type_}'s `callable` must return a string or a JSON serializable object."
350342
)
351-
log = log_func(
352-
inputs=datapoint.inputs,
353-
output=output,
354-
source_datapoint_id=datapoint.id,
355-
start_time=start_time,
356-
end_time=datetime.now(),
357-
)
343+
log = log_func(
344+
inputs=datapoint.inputs,
345+
output=output,
346+
source_datapoint_id=datapoint.id,
347+
start_time=start_time,
348+
end_time=datetime.now(),
349+
)
358350
except Exception as e:
359351
log = log_func(
360352
inputs=datapoint.inputs,
@@ -377,18 +369,14 @@ def process_datapoint(datapoint: Datapoint):
377369
else:
378370
judgment = eval_function(log.dict())
379371

380-
if local_evaluator.get("custom_logger", None):
381-
local_evaluator["custom_logger"](client=client, judgment=judgment)
382-
else:
383-
# The API call will validate the judgment
384-
_ = client.evaluators.log(
385-
parent_id=log.id,
386-
id=local_evaluator.get("id"),
387-
path=local_evaluator.get("path"),
388-
judgment=judgment,
389-
start_time=start_time,
390-
end_time=datetime.now(),
391-
)
372+
_ = client.evaluators.log(
373+
parent_id=log.id,
374+
id=local_evaluator.get("id"),
375+
path=local_evaluator.get("path"),
376+
judgment=judgment,
377+
start_time=start_time,
378+
end_time=datetime.now(),
379+
)
392380
except Exception as e:
393381
_ = client.evaluators.log(
394382
parent_id=log.id,

0 commit comments

Comments
 (0)