Skip to content

Commit c6df908

Browse files
marcenacpThe TensorFlow Datasets Authors
authored andcommitted
Allow to deserialize an example even if a key exists that wasn't found in the specs.
This matches tf.data's path: ```python import numpy as np import tensorflow_datasets as tfds required_feature = tfds.features.FeaturesDict({ 'required': tfds.features.Scalar(dtype=np.str_) }) optional_feature = tfds.features.FeaturesDict({ 'optional': tfds.features.Scalar(dtype=np.str_, optional=True) }) serialized_element = required_feature.serialize_example({'required': 'hello world'}) optional_feature.deserialize_example(serialized_element) ``` PiperOrigin-RevId: 690987105
1 parent fc0c421 commit c6df908

File tree

2 files changed

+8
-13
lines changed

2 files changed

+8
-13
lines changed

tensorflow_datasets/core/example_parser.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ def _features_to_numpy(
154154
parsed_example = {}
155155
feature_map = features.feature
156156
for key in feature_map:
157-
ragged_row_length = _RAGGED_ROW_LENGTH_REGEX.match(key)
158157
ragged_flat_values = _RAGGED_FLAT_VALUES_REGEX.match(key)
159158
# For ragged arrays we need to reshape the np.arrays using
160159
# ragged_flat_values/ragged_row_lengths_*. `features` can look like:
@@ -173,14 +172,13 @@ def _features_to_numpy(
173172
flat_example_specs[feature_name],
174173
key,
175174
)
176-
elif ragged_row_length:
177-
# Lengths are extracted later for each feature in _feature_to_numpy.
178-
continue
179175
else:
180-
raise KeyError(
181-
f"Malformed input: {key} is found in the feature, but not in"
182-
f" {flat_example_specs}"
183-
)
176+
# Possible cases when we land here:
177+
# 1. Case ragged: Lengths are extracted later for each feature in
178+
# _feature_to_numpy. So we continue.
179+
# 2. Other case: a key was found in the feature, but not in the specs. We
180+
# just ignore this feature and continue.
181+
continue
184182
return parsed_example
185183

186184

tensorflow_datasets/core/example_parser_test.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,8 @@ def test_key_error_exception_if_example_specs_is_malformed():
116116
).SerializeToString()
117117
example_specs = features.get_tensor_info()
118118
example_parser_np = example_parser.ExampleParserNp(example_specs)
119-
with pytest.raises(
120-
KeyError,
121-
match='(.|\n)*array_of_ints is found in the feature, but not in*',
122-
):
123-
example_parser_np.parse_example(serialized_example)
119+
deserialized_example = example_parser_np.parse_example(serialized_example)
120+
np.testing.assert_equal(deserialized_example, {'doesnotexist': None})
124121

125122

126123
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)