Skip to content

Commit 542c75c

Browse files
authored
Merge pull request #85 from ArcanaFramework/typed-collection
Added `TypedCollection` base class
2 parents f5bf5e6 + 233d079 commit 542c75c

File tree

10 files changed

+270
-109
lines changed

10 files changed

+270
-109
lines changed

fileformats/core/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from ._version import __version__
22
from .classifier import Classifier
33
from .datatype import DataType
4-
from .fileset import FileSet, MockMixin
4+
from .mock import MockMixin
5+
from .fileset import FileSet
56
from .field import Field
67
from .identification import (
78
to_mime,

fileformats/core/collection.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import typing as ty
2+
from pathlib import Path
3+
from abc import ABCMeta, abstractproperty
4+
from fileformats.core import FileSet, validated_property, mtime_cached_property
5+
from fileformats.core.decorators import classproperty
6+
from fileformats.core.exceptions import FormatMismatchError
7+
from fileformats.core.utils import get_optional_type
8+
9+
10+
class TypedCollection(FileSet, metaclass=ABCMeta):
11+
"""Base class for collections of files-sets of specific types either in a directory
12+
or a collection of file paths"""
13+
14+
content_types: ty.Tuple[
15+
ty.Union[ty.Type[FileSet], ty.Type[ty.Optional[FileSet]]], ...
16+
] = ()
17+
18+
@abstractproperty
19+
def content_fspaths(self) -> ty.Iterable[Path]:
20+
... # noqa: E704
21+
22+
@mtime_cached_property
23+
def contents(self) -> ty.List[FileSet]:
24+
contnts = []
25+
for content_type in self.potential_content_types:
26+
assert content_type
27+
for p in self.content_fspaths:
28+
try:
29+
contnts.append(content_type([p], **self._load_kwargs))
30+
except FormatMismatchError:
31+
continue
32+
return contnts
33+
34+
@validated_property
35+
def _validate_required_content_types(self) -> None:
36+
not_found = set(self.required_content_types)
37+
if not not_found:
38+
return
39+
for fspath in self.content_fspaths:
40+
for content_type in list(not_found):
41+
if content_type.matches(fspath):
42+
not_found.remove(content_type)
43+
if not not_found:
44+
return
45+
assert not_found
46+
raise FormatMismatchError(
47+
f"Did not find the required content types, {not_found}, in {self}"
48+
)
49+
50+
@classproperty
51+
def potential_content_types(cls) -> ty.Tuple[ty.Type[FileSet], ...]:
52+
content_types: ty.List[ty.Type[FileSet]] = []
53+
for content_type in cls.content_types: # type: ignore[assignment]
54+
content_types.append(get_optional_type(content_type)) # type: ignore[arg-type]
55+
return tuple(content_types)
56+
57+
@classproperty
58+
def required_content_types(cls) -> ty.Tuple[ty.Type[FileSet], ...]:
59+
content_types: ty.List[ty.Type[FileSet]] = []
60+
for content_type in cls.content_types: # type: ignore[assignment]
61+
if ty.get_origin(content_type) is None:
62+
content_types.append(content_type) # type: ignore[arg-type]
63+
return tuple(content_types)
64+
65+
@classproperty
66+
def unconstrained(cls) -> bool:
67+
"""Whether the file-format is unconstrained by extension, magic number or another
68+
constraint"""
69+
return super().unconstrained and not cls.content_types

fileformats/core/fileset.py

Lines changed: 1 addition & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from .datatype import DataType
4343
from .extras import extra
4444
from .fs_mount_identifier import FsMountIdentifier
45+
from .mock import MockMixin
4546

4647
if ty.TYPE_CHECKING:
4748
from pydra.engine.task import TaskBase
@@ -1742,44 +1743,3 @@ def _new_copy_path(
17421743
_formats_by_name: ty.Optional[ty.Dict[str, ty.Set[ty.Type["FileSet"]]]] = None
17431744
_required_props: ty.Optional[ty.Tuple[str, ...]] = None
17441745
_valid_class: ty.Optional[bool] = None
1745-
1746-
1747-
class MockMixin:
1748-
"""Strips out validation methods of a class, allowing it to be mocked in a way that
1749-
still satisfies type-checking"""
1750-
1751-
def __init__(
1752-
self,
1753-
fspaths: FspathsInputType,
1754-
metadata: ty.Union[ty.Dict[str, ty.Any], bool, None] = False,
1755-
):
1756-
self.fspaths = fspaths_converter(fspaths)
1757-
self._metadata = metadata
1758-
1759-
@classproperty
1760-
def type_name(cls) -> str:
1761-
return cls.mocked.type_name
1762-
1763-
def __bytes_repr__(self, cache: ty.Dict[str, ty.Any]) -> ty.Iterable[bytes]:
1764-
yield from (str(fspath).encode() for fspath in self.fspaths)
1765-
1766-
@classproperty
1767-
def mocked(cls) -> FileSet:
1768-
"""The "true" class that the mocked class is based on"""
1769-
return next(c for c in cls.__mro__ if not issubclass(c, MockMixin)) # type: ignore[no-any-return, attr-defined]
1770-
1771-
@classproperty
1772-
def namespace(cls) -> str:
1773-
"""The "namespace" the format belongs to under the "fileformats" umbrella
1774-
namespace"""
1775-
mro: ty.Tuple[ty.Type] = cls.__mro__ # type: ignore
1776-
for base in mro:
1777-
if issubclass(base, MockMixin):
1778-
continue
1779-
try:
1780-
return base.namespace # type: ignore
1781-
except FormatDefinitionError:
1782-
pass
1783-
raise FormatDefinitionError(
1784-
f"None of of the bases classes of {cls} ({mro}) have a valid namespace"
1785-
)

fileformats/core/mixin.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import logging
55
from .datatype import DataType
66
import fileformats.core
7-
from .utils import describe_task, matching_source
7+
from .utils import describe_task, matching_source, get_optional_type
88
from .decorators import validated_property, classproperty
99
from .identification import to_mime_format_name
1010
from .converter_helpers import SubtypeVar, ConverterSpec
@@ -292,6 +292,7 @@ def my_func(file: MyFormatWithClassifiers[Integer]):
292292
# Default values for class attrs
293293
multiple_classifiers = True
294294
allowed_classifiers: ty.Optional[ty.Tuple[ty.Type[Classifier], ...]] = None
295+
allow_optional_classifiers = False
295296
exclusive_classifiers: ty.Tuple[ty.Type[Classifier], ...] = ()
296297
ordered_classifiers = False
297298
generically_classifiable = False
@@ -320,7 +321,9 @@ def wildcard_classifiers(
320321
) -> ty.FrozenSet[ty.Type[SubtypeVar]]:
321322
if classifiers is None:
322323
classifiers = cls.classifiers if cls.is_classified else ()
323-
return frozenset(t for t in classifiers if issubclass(t, SubtypeVar))
324+
return frozenset(
325+
t for t in classifiers if issubclass(get_optional_type(t), SubtypeVar) # type: ignore[misc]
326+
)
324327

325328
@classmethod
326329
def non_wildcard_classifiers(
@@ -329,7 +332,9 @@ def non_wildcard_classifiers(
329332
if classifiers is None:
330333
classifiers = cls.classifiers if cls.is_classified else ()
331334
assert classifiers is not None
332-
return frozenset(q for q in classifiers if not issubclass(q, SubtypeVar))
335+
return frozenset(
336+
q for q in classifiers if not issubclass(get_optional_type(q), SubtypeVar)
337+
)
333338

334339
@classmethod
335340
def __class_getitem__(
@@ -341,11 +346,15 @@ def __class_getitem__(
341346
classifiers_tuple = tuple(classifiers)
342347
else:
343348
classifiers_tuple = (classifiers,)
349+
classifiers_to_check = tuple(
350+
get_optional_type(c, cls.allow_optional_classifiers)
351+
for c in classifiers_tuple
352+
)
344353

345354
if cls.allowed_classifiers:
346355
not_allowed = [
347356
q
348-
for q in classifiers_tuple
357+
for q in classifiers_to_check
349358
if not any(issubclass(q, t) for t in cls.allowed_classifiers)
350359
]
351360
if not_allowed:
@@ -357,15 +366,17 @@ def __class_getitem__(
357366
if cls.multiple_classifiers:
358367
if not cls.ordered_classifiers:
359368
# Check for duplicate classifiers in the multiple list
360-
if len(classifiers_tuple) > 1:
369+
if len(classifiers_to_check) > 1:
361370
# Sort the classifiers into categories and ensure that there aren't more
362371
# than one type for each category. Otherwise, if the classifier doesn't
363372
# belong to a category, check to see that there aren't multiple sub-classes
364373
# in the classifier set
365374
repetitions: ty.Dict[
366375
ty.Type[Classifier], ty.List[ty.Type[Classifier]]
367-
] = {c: [] for c in cls.exclusive_classifiers + classifiers_tuple}
368-
for classifier in classifiers_tuple:
376+
] = {
377+
c: [] for c in cls.exclusive_classifiers + classifiers_to_check
378+
}
379+
for classifier in classifiers_to_check:
369380
for exc_classifier in repetitions:
370381
if issubclass(classifier, exc_classifier):
371382
repetitions[exc_classifier].append(classifier)
@@ -381,7 +392,10 @@ def __class_getitem__(
381392
)
382393
)
383394
classifiers_tuple = tuple(
384-
sorted(set(classifiers_tuple), key=lambda x: x.__name__)
395+
sorted(
396+
set(classifiers_tuple),
397+
key=lambda x: get_optional_type(x).__name__,
398+
)
385399
)
386400
else:
387401
if len(classifiers_tuple) > 1:
@@ -428,7 +442,9 @@ def __class_getitem__(
428442
class_attrs[cls.classifiers_attr_name] = (
429443
classifiers_tuple if cls.multiple_classifiers else classifiers_tuple[0]
430444
)
431-
classifier_names = [t.__name__ for t in classifiers_tuple]
445+
classifier_names = [
446+
get_optional_type(t).__name__ for t in classifiers_tuple
447+
]
432448
if not cls.ordered_classifiers:
433449
classifier_names.sort()
434450
classified = type(

fileformats/core/mock.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import typing as ty
2+
from .utils import (
3+
fspaths_converter,
4+
)
5+
from .decorators import classproperty
6+
from .typing import FspathsInputType
7+
from .exceptions import (
8+
FormatDefinitionError,
9+
)
10+
11+
if ty.TYPE_CHECKING:
12+
from .fileset import FileSet
13+
14+
15+
class MockMixin:
16+
"""Strips out validation methods of a class, allowing it to be mocked in a way that
17+
still satisfies type-checking"""
18+
19+
def __init__(
20+
self,
21+
fspaths: FspathsInputType,
22+
metadata: ty.Union[ty.Dict[str, ty.Any], bool, None] = False,
23+
):
24+
self.fspaths = fspaths_converter(fspaths)
25+
self._metadata = metadata
26+
27+
@classproperty
28+
def type_name(cls) -> str:
29+
return cls.mocked.type_name
30+
31+
def __bytes_repr__(self, cache: ty.Dict[str, ty.Any]) -> ty.Iterable[bytes]:
32+
yield from (str(fspath).encode() for fspath in self.fspaths)
33+
34+
@classproperty
35+
def mocked(cls) -> "FileSet":
36+
"""The "true" class that the mocked class is based on"""
37+
return next(c for c in cls.__mro__ if not issubclass(c, MockMixin)) # type: ignore[no-any-return, attr-defined]
38+
39+
@classproperty
40+
def namespace(cls) -> str:
41+
"""The "namespace" the format belongs to under the "fileformats" umbrella
42+
namespace"""
43+
mro: ty.Tuple[ty.Type] = cls.__mro__ # type: ignore
44+
for base in mro:
45+
if issubclass(base, MockMixin):
46+
continue
47+
try:
48+
return base.namespace # type: ignore
49+
except FormatDefinitionError:
50+
pass
51+
raise FormatDefinitionError(
52+
f"None of of the bases classes of {cls} ({mro}) have a valid namespace"
53+
)

fileformats/core/utils.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from contextlib import contextmanager
1212
from .typing import FspathsInputType
1313
import fileformats.core
14+
from fileformats.core.exceptions import FormatDefinitionError
1415

1516
if ty.TYPE_CHECKING:
1617
import pydra.engine.core
@@ -228,3 +229,39 @@ def import_extras_module(klass: ty.Type["fileformats.core.DataType"]) -> ExtrasM
228229
else:
229230
extras_imported = True
230231
return ExtrasModule(extras_imported, extras_pkg, extras_pypi)
232+
233+
234+
TypeType = ty.TypeVar("TypeType", bound=ty.Type[ty.Any])
235+
236+
237+
def get_optional_type(
238+
type_: ty.Union[TypeType, ty.Type[ty.Optional[TypeType]]], allowed: bool = True
239+
) -> TypeType:
240+
"""Checks if a type is an Optional type
241+
242+
Parameters
243+
----------
244+
type_ : ty.Type
245+
the type to check
246+
allowed : bool
247+
whether Optional types are allowed or not
248+
249+
Returns
250+
-------
251+
bool
252+
whether the type is an Optional type or not
253+
"""
254+
if ty.get_origin(type_) is None:
255+
return type_ # type: ignore[return-value]
256+
if not allowed:
257+
raise FormatDefinitionError(
258+
f"Optional types are not allowed in content_type definitions ({type_}) "
259+
"in this context"
260+
)
261+
args = ty.get_args(type_)
262+
if len(args) != 2 and None in ty.get_args(type_):
263+
raise FormatDefinitionError(
264+
"Only Optional types are allowed in content_type definitions, "
265+
f"not {type_}"
266+
)
267+
return args[0] if args[0] is not None else args[1] # type: ignore[no-any-return]

0 commit comments

Comments
 (0)