Skip to content

Commit 635d5ac

Browse files
author
Attila Bergou
committed
Fix structured arrays that contain objects #806
* Ensures that the fill value of structured arrays that contain objects is encoded using object_codec.
1 parent da88aa3 commit 635d5ac

3 files changed

Lines changed: 30 additions & 11 deletions

File tree

zarr/meta.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,14 @@ def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]:
4040
# extract array metadata fields
4141
try:
4242
dtype = decode_dtype(meta['dtype'])
43-
fill_value = decode_fill_value(meta['fill_value'], dtype)
43+
44+
if dtype.hasobject:
45+
import numcodecs
46+
object_codec = numcodecs.get_codec(meta['filters'][0])
47+
else:
48+
object_codec = None
49+
50+
fill_value = decode_fill_value(meta['fill_value'], dtype, object_codec)
4451
meta = dict(
4552
zarr_format=meta['zarr_format'],
4653
shape=tuple(meta['shape']),
@@ -66,14 +73,18 @@ def encode_array_metadata(meta: MappingType[str, Any]) -> bytes:
6673
dtype, sdshape = dtype.subdtype
6774

6875
dimension_separator = meta.get('dimension_separator')
69-
76+
if dtype.hasobject:
77+
import numcodecs
78+
object_codec = numcodecs.get_codec(meta['filters'][0])
79+
else:
80+
object_codec = None
7081
meta = dict(
7182
zarr_format=ZARR_FORMAT,
7283
shape=meta['shape'] + sdshape,
7384
chunks=meta['chunks'],
7485
dtype=encode_dtype(dtype),
7586
compressor=meta['compressor'],
76-
fill_value=encode_fill_value(meta['fill_value'], dtype),
87+
fill_value=encode_fill_value(meta['fill_value'], dtype, object_codec),
7788
order=meta['order'],
7889
filters=meta['filters'],
7990
)
@@ -132,11 +143,16 @@ def encode_group_metadata(meta=None) -> bytes:
132143
}
133144

134145

135-
def decode_fill_value(v, dtype):
146+
def decode_fill_value(v, dtype, object_codec=None):
136147
# early out
137148
if v is None:
138149
return v
139-
if dtype.kind == 'f':
150+
if dtype.hasobject:
151+
v = base64.standard_b64decode(v)
152+
v = object_codec.decode(v)
153+
v = np.array(v, dtype=dtype)[()]
154+
return v
155+
elif dtype.kind == 'f':
140156
if v == 'NaN':
141157
return np.nan
142158
elif v == 'Infinity':
@@ -171,10 +187,14 @@ def decode_fill_value(v, dtype):
171187
return np.array(v, dtype=dtype)[()]
172188

173189

174-
def encode_fill_value(v: Any, dtype: np.dtype) -> Any:
190+
def encode_fill_value(v: Any, dtype: np.dtype, object_codec: Any = None) -> Any:
175191
# early out
176192
if v is None:
177193
return v
194+
if dtype.hasobject:
195+
v = object_codec.encode(v)
196+
v = str(base64.standard_b64encode(v), 'ascii')
197+
return v
178198
if dtype.kind == 'f':
179199
if np.isnan(v):
180200
return 'NaN'
@@ -190,8 +210,8 @@ def encode_fill_value(v: Any, dtype: np.dtype) -> Any:
190210
return bool(v)
191211
elif dtype.kind in 'c':
192212
c = cast(np.complex128, np.dtype(complex).type())
193-
v = (encode_fill_value(v.real, c.real.dtype),
194-
encode_fill_value(v.imag, c.imag.dtype))
213+
v = (encode_fill_value(v.real, c.real.dtype, object_codec),
214+
encode_fill_value(v.imag, c.imag.dtype, object_codec))
195215
return v
196216
elif dtype.kind in 'SV':
197217
v = str(base64.standard_b64encode(v), 'ascii')

zarr/storage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ def _init_array_metadata(
423423
filters_config = []
424424

425425
# deal with object encoding
426-
if dtype == object:
426+
if dtype.hasobject:
427427
if object_codec is None:
428428
if not filters:
429429
# there are no filters so we can be sure there is no object codec

zarr/util.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,9 @@ def normalize_dimension_separator(sep: Optional[str]) -> Optional[str]:
253253

254254
def normalize_fill_value(fill_value, dtype: np.dtype):
255255

256-
if fill_value is None:
256+
if fill_value is None or dtype.hasobject:
257257
# no fill value
258258
pass
259-
260259
elif fill_value == 0:
261260
# this should be compatible across numpy versions for any array type, including
262261
# structured arrays

0 commit comments

Comments
 (0)