Skip to content

Commit f5bf5e6

Browse files
authored
Merge pull request #84 from ArcanaFramework/dicom-metadata
Cleans up the reading of DICOM metadata and returns dictionary not DICOM object
2 parents 3abc44d + 74923c2 commit f5bf5e6

21 files changed

+257
-161
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,6 @@ repos:
3030
rev: v1.11.2
3131
hooks:
3232
- id: mypy
33-
args:
34-
[
35-
--strict,
36-
--install-types,
37-
--non-interactive,
38-
]
33+
args: [--strict, --install-types, --non-interactive, --no-warn-unused-ignores]
3934
exclude: tests
40-
additional_dependencies: [pytest, attrs, imageio]
35+
additional_dependencies: [pytest, attrs, imageio, pydicom]

docs/source/developer/extensions.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ the :class:`.WithSeparateHeader` mixin.
8686
ext = ".hdr"
8787
8888
def load(self):
89-
return dict(ln.split(":") for ln in self.contents.splitlines())
89+
return dict(ln.split(":") for ln in self.raw_contents.splitlines())
9090
9191
class MyFormatWithHeader(WithSeparateHeader, File):
9292
ext = ".myh"
Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,28 @@
11
import typing as ty
22
from pathlib import Path
3-
import pydicom
3+
from typing_extensions import TypeAlias
4+
import pydicom.tag
45
from fileformats.core import FileSet, extra_implementation
56
from fileformats.application import Dicom
67
import medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c
78
from fileformats.core import SampleFileGenerator
89

10+
TagListType: TypeAlias = ty.Union[
11+
ty.List[int],
12+
ty.List[str],
13+
ty.List[ty.Tuple[int, int]],
14+
ty.List[pydicom.tag.BaseTag],
15+
]
16+
917

1018
@extra_implementation(FileSet.read_metadata)
1119
def dicom_read_metadata(
1220
dicom: Dicom,
13-
specific_tags: ty.Optional[ty.Collection[str]] = None,
21+
metadata_keys: ty.Optional[TagListType] = None,
1422
**kwargs: ty.Any,
1523
) -> ty.Mapping[str, ty.Any]:
16-
dcm = pydicom.dcmread(
17-
dicom.fspath,
18-
specific_tags=list(specific_tags if specific_tags is not None else []),
19-
)
20-
[getattr(dcm, a, None) for a in dir(dcm)] # Ensure all keywords are set
21-
metadata = {
22-
e.keyword: e.value
23-
for e in dcm.elements()
24-
if isinstance(e, pydicom.DataElement)
25-
and getattr(e, "keyword", False)
26-
and e.keyword != "PixelData"
27-
}
28-
return metadata
24+
dcm = pydicom.dcmread(dicom.fspath, specific_tags=metadata_keys)
25+
return Dicom.pydicom_to_dict(dcm)
2926

3027

3128
@extra_implementation(FileSet.generate_sample_data)
@@ -38,3 +35,22 @@ def dicom_generate_sample_data(
3835
out_dir=generator.dest_dir
3936
).iterdir()
4037
)
38+
39+
40+
@extra_implementation(FileSet.load)
41+
def dicom_load(
42+
dicom: Dicom,
43+
specific_tags: ty.Optional[TagListType] = None,
44+
**kwargs: ty.Any,
45+
) -> pydicom.FileDataset:
46+
return pydicom.dcmread(dicom.fspath, specific_tags=specific_tags)
47+
48+
49+
@extra_implementation(FileSet.save)
50+
def dicom_save(
51+
dicom: Dicom,
52+
data: pydicom.FileDataset,
53+
write_like_original: bool = False,
54+
**kwargs: ty.Any,
55+
) -> None:
56+
pydicom.dcmwrite(dicom.fspath, data, write_like_original=write_like_original)

extras/fileformats/extras/application/serialization.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import yaml
55
import pydra.mark
66
import pydra.engine.specs
7-
from fileformats.core import converter, extra_implementation
7+
from fileformats.core import FileSet, converter, extra_implementation
88
from fileformats.application import TextSerialization, Json, Yaml
99
from fileformats.application.serialization import SerializationType
1010

@@ -27,14 +27,18 @@ def convert_data_serialization(
2727
return output_format.new(output_path, dct)
2828

2929

30-
@extra_implementation(TextSerialization.load)
31-
def yaml_load(yml: Yaml) -> SerializationType:
30+
@extra_implementation(FileSet.load)
31+
def yaml_load(yml: Yaml, **kwargs: ty.Any) -> SerializationType:
3232
with open(yml.fspath) as f:
3333
data = yaml.load(f, Loader=yaml.Loader)
3434
return data # type: ignore[no-any-return]
3535

3636

37-
@extra_implementation(TextSerialization.save)
38-
def yaml_save(yml: Yaml, data: SerializationType) -> None:
37+
@extra_implementation(FileSet.save)
38+
def yaml_save(
39+
yml: Yaml,
40+
data: SerializationType,
41+
**kwargs: ty.Any,
42+
) -> None:
3943
with open(yml.fspath, "w") as f:
40-
yaml.dump(data, f)
44+
yaml.dump(data, f, **kwargs)
Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,18 @@
1+
import pytest
12
from fileformats.application import Dicom
23

34

45
def test_dicom_metadata():
56

67
dicom = Dicom.sample()
78

8-
assert dicom.metadata["EchoTime"] == "2.07"
9+
assert dicom.metadata["EchoTime"] == 2.07
10+
11+
12+
def test_dicom_metadata_with_specific_tags():
13+
14+
dicom = Dicom(Dicom.sample(), metadata_keys=["EchoTime"])
15+
16+
assert dicom.metadata["EchoTime"] == 2.07
17+
with pytest.raises(KeyError):
18+
dicom.metadata["PatientName"]

extras/fileformats/extras/application/tests/test_application_serialization.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,10 @@
33
from fileformats.application import Json, Yaml
44

55

6-
SAMPLE_JSON = """{
7-
"a": "string field",
8-
"alist": [0, 1, 2, 3, 4, 5],
9-
"anesteddict": {
10-
"x": null,
11-
"y": [],
12-
"z": 42.0
13-
}
14-
}"""
6+
SAMPLE_JSON = (
7+
"""{"a": "string field", "alist": [0, 1, 2, 3, 4, 5], """
8+
""""anesteddict": {"x": null, "y": [], "z": 42.0}}"""
9+
)
1510

1611
SAMPLE_YAML = """a: string field
1712
alist:
@@ -38,7 +33,7 @@ def test_json_to_yaml(work_dir):
3833
f.write(SAMPLE_JSON)
3934
jsn = Json(in_file)
4035
yml = Yaml.convert(jsn)
41-
assert yml.contents == SAMPLE_YAML
36+
assert yml.raw_contents == SAMPLE_YAML
4237

4338

4439
# @pytest.mark.xfail(
@@ -50,5 +45,5 @@ def test_yaml_to_json(work_dir):
5045
with open(in_file, "w") as f:
5146
f.write(SAMPLE_JSON)
5247
yml = Yaml(in_file)
53-
Json.convert(yml)
54-
assert yml.contents == SAMPLE_JSON
48+
jsn = Json.convert(yml)
49+
assert jsn.raw_contents == SAMPLE_JSON
Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
import imageio
2+
import typing as ty
23
import numpy # noqa: F401
34
import typing # noqa: F401
4-
from fileformats.core import extra_implementation
5+
from fileformats.core import FileSet, extra_implementation
56
from fileformats.image.raster import RasterImage, DataArrayType
67

78

8-
@extra_implementation(RasterImage.load)
9-
def read_raster_data(image: RasterImage) -> DataArrayType:
9+
@extra_implementation(FileSet.load)
10+
def read_raster_data(image: RasterImage, **kwargs: ty.Any) -> DataArrayType:
1011
return imageio.imread(image.fspath) # type: ignore
1112

1213

13-
@extra_implementation(RasterImage.save)
14-
def write_raster_data(image: RasterImage, data: DataArrayType) -> None:
15-
imageio.imwrite(image.fspath, data)
14+
@extra_implementation(FileSet.save)
15+
def write_raster_data(
16+
image: RasterImage, data: DataArrayType, **kwargs: ty.Any
17+
) -> None:
18+
imageio.imwrite(image.fspath, data, **kwargs)

fileformats/application/medical.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1+
import typing as ty
12
from fileformats.generic import BinaryFile
23
from fileformats.core.mixin import WithMagicNumber
34

5+
if ty.TYPE_CHECKING:
6+
import pydicom
7+
48

59
class Dicom(WithMagicNumber, BinaryFile):
610

@@ -10,3 +14,57 @@ class Dicom(WithMagicNumber, BinaryFile):
1014
binary = True
1115

1216
alternate_exts = (".dcm",) # dcm is recommended not required
17+
18+
@classmethod
19+
def pydicom_to_dict(
20+
cls, dcm: "pydicom.Dataset", omit: ty.Collection[str] = ("PixelData",)
21+
) -> ty.Dict[str, ty.Any]:
22+
"""Convert a pydicom Dataset to a dictionary.
23+
24+
Parameters
25+
----------
26+
dcm : pydicom.Dataset
27+
The pydicom Dataset to convert.
28+
omit : Collection[str], optional
29+
A collection of keys to omit from the dictionary, by default ("PixelData",)
30+
31+
Returns
32+
-------
33+
Dict[str, Any]
34+
The dictionary representation of the pydicom Dataset
35+
"""
36+
import pydicom.dataset
37+
import pydicom.valuerep
38+
import pydicom.multival
39+
import pydicom.uid
40+
41+
# Ensure that all keys are loaded before creating dictionary otherwise the keywords
42+
# will not be set in the elem
43+
[getattr(dcm, attr, None) for attr in dir(dcm)]
44+
dct: ty.Dict[str, ty.Any] = {}
45+
for elem in dcm.values():
46+
try:
47+
key = elem.keyword # type: ignore[union-attr, attr-defined]
48+
except AttributeError:
49+
key = None
50+
if not key:
51+
key = elem.tag.json_key # type: ignore[attr-defined]
52+
if key not in omit:
53+
value = elem.value # type: ignore[attr-defined]
54+
if isinstance(value, pydicom.multival.MultiValue):
55+
value = [str(v) for v in value]
56+
elif isinstance(value, pydicom.uid.UID):
57+
value = str(value)
58+
elif isinstance(value, bytes):
59+
value = value.decode(errors="ignore")
60+
elif isinstance(value, pydicom.dataset.Dataset):
61+
value = cls.pydicom_to_dict(value, omit)
62+
elif isinstance(value, pydicom.valuerep.IS):
63+
value = int(value)
64+
elif isinstance(value, pydicom.valuerep.DSfloat):
65+
value = float(value)
66+
# Can be handy to be able to access family_name and given_name separately
67+
# elif isinstance(value, pydicom.valuerep.PersonName):
68+
# value = str(value)
69+
dct[key] = value
70+
return dct

fileformats/application/serialization.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,16 +103,16 @@ def generate_yaml_sample_data(
103103

104104

105105
@extra_implementation(FileSet.load)
106-
def load(jsn: Json) -> SerializationType:
106+
def load(jsn: Json, **kwargs: ty.Any) -> SerializationType:
107107
try:
108108
with jsn.open() as f:
109-
dct: ty.Dict[str, ty.Any] = json.load(f)
109+
dct: ty.Dict[str, ty.Any] = json.load(f, **kwargs)
110110
except json.JSONDecodeError as e:
111111
raise FormatMismatchError(f"'{jsn.fspath}' is not a valid JSON file") from e
112112
return dct
113113

114114

115115
@extra_implementation(FileSet.save)
116-
def save(jsn: Json, data: SerializationType) -> None:
116+
def save(jsn: Json, data: SerializationType, **kwargs: ty.Any) -> None:
117117
with jsn.open("w") as f:
118-
json.dump(data, f)
118+
json.dump(data, f, **kwargs)

0 commit comments

Comments
 (0)