Skip to content

Commit e55ea31

Browse files
authored
Merge pull request #72 from ArcanaFramework/mtime-caching
Add mtime_cached_property for convenient caching of properties until/unless the mtimes of the files change
2 parents a9a3715 + 53e50d3 commit e55ea31

File tree

12 files changed

+180
-58
lines changed

12 files changed

+180
-58
lines changed
11 KB
Loading

extras/fileformats/extras/application/archive.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,4 +238,4 @@ def relative_path(path: PathType, base_dir: PathType) -> str:
238238
f"Cannot add {path} to archive as it is not a "
239239
f"subdirectory of {base_dir}"
240240
)
241-
return relpath # type: ignore[no-any-return]
241+
return relpath

extras/fileformats/extras/application/serialization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def convert_data_serialization(
2424
output_path = out_dir / (
2525
in_file.fspath.stem + (output_format.ext if output_format.ext else "")
2626
)
27-
return output_format.save_new(output_path, dct) # type: ignore[no-any-return]
27+
return output_format.save_new(output_path, dct)
2828

2929

3030
@extra_implementation(DataSerialization.load)

extras/fileformats/extras/image/converters.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@ def convert_image(
2525
output_path = out_dir / (
2626
in_file.fspath.stem + (output_format.ext if output_format.ext else "")
2727
)
28-
return output_format.save_new(output_path, data_array) # type: ignore[no-any-return]
28+
return output_format.save_new(output_path, data_array)

fileformats/core/fileset.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from fileformats.core.typing import Self
1818
from .utils import (
1919
classproperty,
20+
mtime_cached_property,
2021
fspaths_converter,
2122
describe_task,
2223
matching_source,
@@ -67,7 +68,10 @@ class FileSet(DataType):
6768
a set of file-system paths pointing to all the resources in the file-set
6869
metadata : dict[str, Any]
6970
metadata associated with the file-set, typically lazily loaded via `read_metadata`
70-
extra hook
71+
extra hook but can be provided directly at the time of instantiation
72+
metadata_keys : list[str]
73+
the keys of the metadata to load when the `metadata` property is called. Provided
74+
to allow for selective loading of metadata fields for performance reasons.
7175
"""
7276

7377
# Class attributes
@@ -87,14 +91,17 @@ class FileSet(DataType):
8791

8892
# Member attributes
8993
fspaths: ty.FrozenSet[Path]
90-
_metadata: ty.Union[ty.Mapping[str, ty.Any], bool, None]
94+
_explicit_metadata: ty.Optional[ty.Mapping[str, ty.Any]]
95+
_metadata_keys: ty.Optional[ty.List[str]]
9196

9297
def __init__(
9398
self,
9499
fspaths: FspathsInputType,
95-
metadata: ty.Union[ty.Dict[str, ty.Any], bool, None] = False,
100+
metadata: ty.Optional[ty.Dict[str, ty.Any]] = None,
101+
metadata_keys: ty.Optional[ty.List[str]] = None,
96102
):
97-
self._metadata = metadata
103+
self._explicit_metadata = metadata
104+
self._metadata_keys = metadata_keys
98105
self._validate_class()
99106
self.fspaths = fspaths_converter(fspaths)
100107
self._validate_fspaths()
@@ -179,6 +186,18 @@ def relative_fspaths(self) -> ty.Iterator[Path]:
179186
"Paths for all top-level paths in the file-set relative to the common parent directory"
180187
return (p.relative_to(self.parent) for p in self.fspaths)
181188

189+
@property
190+
def mtimes(self) -> ty.Tuple[ty.Tuple[str, float], ...]:
191+
"""Modification times of all fspaths in the file-set
192+
193+
Returns
194+
-------
195+
tuple[tuple[str, float], ...]
196+
a tuple of tuples containing the file paths and the modification time sorted
197+
by the file path
198+
"""
199+
return tuple((str(p), p.stat().st_mtime) for p in sorted(self.fspaths))
200+
182201
@classproperty
183202
def mime_type(cls) -> str:
184203
"""Generates a MIME type (IANA) identifier from a format class. If an official
@@ -225,41 +244,22 @@ def possible_exts(cls) -> ty.List[ty.Optional[str]]:
225244
pass
226245
return possible
227246

228-
@property
247+
@mtime_cached_property
229248
def metadata(self) -> ty.Mapping[str, ty.Any]:
230249
"""Lazily load metadata from `read_metadata` extra if implemented, returning an
231250
empty metadata array if not"""
232-
if self._metadata is not False:
233-
assert self._metadata is not True
234-
return self._metadata if self._metadata else {}
251+
if self._explicit_metadata is not None:
252+
return self._explicit_metadata
235253
try:
236-
self._metadata = self.read_metadata()
254+
metadata = self.read_metadata(selected_keys=self._metadata_keys)
237255
except FileFormatsExtrasPkgUninstalledError:
238256
raise
239257
except FileFormatsExtrasPkgNotCheckedError as e:
240258
logger.warning(str(e))
241-
self._metadata = None
259+
metadata = {}
242260
except FileFormatsExtrasError:
243-
self._metadata = None
244-
return self._metadata if self._metadata else {}
245-
246-
def select_metadata(
247-
self, selected_keys: ty.Union[ty.Sequence[str], None] = None
248-
) -> None:
249-
"""Selects a subset of the metadata to be read and stored instead all available
250-
(i.e for performance reasons).
251-
252-
Parameters
253-
----------
254-
selected_keys : Union[Sequence[str], None]
255-
the keys of the values to load. If None, all values are loaded
256-
"""
257-
if not (
258-
isinstance(self._metadata, dict)
259-
and selected_keys is not None
260-
and set(selected_keys).issubset(self._metadata)
261-
):
262-
self._metadata = dict(self.read_metadata(selected_keys))
261+
metadata = {}
262+
return metadata
263263

264264
@extra
265265
def read_metadata(
@@ -672,7 +672,7 @@ def formats_by_iana_mime(cls) -> ty.Dict[str, ty.Type["FileSet"]]:
672672
"""a dictionary containing all formats by their IANA MIME type (if applicable)"""
673673
if cls._formats_by_iana_mime is None:
674674
cls._formats_by_iana_mime = {
675-
f.iana_mime: f # type: ignore
675+
f.iana_mime: f # type: ignore[misc]
676676
for f in FileSet.all_formats
677677
if f.__dict__.get("iana_mime") is not None
678678
}

fileformats/core/tests/test_metadata.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import typing as ty
22
import pytest
3+
import time
34
from fileformats.core import FileSet, extra_implementation
45
from fileformats.generic import File
56

@@ -21,7 +22,7 @@ def aformat_read_metadata(
2122

2223

2324
@pytest.fixture
24-
def file_with_metadata(tmp_path):
25+
def file_with_metadata_fspath(tmp_path):
2526
metadata = {
2627
"a": 1,
2728
"b": 2,
@@ -32,27 +33,46 @@ def file_with_metadata(tmp_path):
3233
fspath = tmp_path / "metadata-file.mf"
3334
with open(fspath, "w") as f:
3435
f.write("\n".join("{}:{}".format(*t) for t in metadata.items()))
35-
return FileWithMetadata(fspath)
36+
return fspath
3637

3738

38-
def test_metadata(file_with_metadata):
39+
def test_metadata(file_with_metadata_fspath):
40+
file_with_metadata = FileWithMetadata(file_with_metadata_fspath)
3941
assert file_with_metadata.metadata["a"] == "1"
4042
assert sorted(file_with_metadata.metadata) == ["a", "b", "c", "d", "e"]
4143

4244

43-
def test_select_metadata(file_with_metadata):
44-
file_with_metadata.select_metadata(["a", "b", "c"])
45+
def test_select_metadata(file_with_metadata_fspath):
46+
file_with_metadata = FileWithMetadata(
47+
file_with_metadata_fspath, metadata_keys=["a", "b", "c"]
48+
)
4549
assert file_with_metadata.metadata["a"] == "1"
4650
assert sorted(file_with_metadata.metadata) == ["a", "b", "c"]
4751

4852

49-
def test_select_metadata_reload(file_with_metadata):
50-
file_with_metadata.select_metadata(["a", "b", "c"])
53+
def test_explicit_metadata(file_with_metadata_fspath):
54+
file_with_metadata = FileWithMetadata(
55+
file_with_metadata_fspath,
56+
metadata={
57+
"a": 1,
58+
"b": 2,
59+
"c": 3,
60+
},
61+
)
62+
# Check that we use the explicitly provided metadata and not one from the file
63+
# contents
5164
assert sorted(file_with_metadata.metadata) == ["a", "b", "c"]
52-
# add new metadata line to check that it isn't loaded
65+
# add new metadata line to check and check that it isn't reloaded
5366
with open(file_with_metadata, "a") as f:
5467
f.write("\nf:6")
55-
file_with_metadata.select_metadata(["a", "b"])
5668
assert sorted(file_with_metadata.metadata) == ["a", "b", "c"]
57-
file_with_metadata.select_metadata(None)
69+
70+
71+
def test_metadata_reload(file_with_metadata_fspath):
72+
file_with_metadata = FileWithMetadata(file_with_metadata_fspath)
73+
assert sorted(file_with_metadata.metadata) == ["a", "b", "c", "d", "e"]
74+
# add new metadata line to check and check that it is reloaded
75+
time.sleep(2)
76+
with open(file_with_metadata, "a") as f:
77+
f.write("\nf:6")
5878
assert sorted(file_with_metadata.metadata) == ["a", "b", "c", "d", "e", "f"]

fileformats/core/tests/test_utils.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from fileformats.generic import File, Directory, FsObject
99
from fileformats.core.mixin import WithSeparateHeader
1010
from fileformats.core.exceptions import UnsatisfiableCopyModeError
11+
from fileformats.core.utils import mtime_cached_property
1112
from conftest import write_test_file
1213

1314

@@ -365,3 +366,44 @@ def test_hash_files(fsobject: FsObject, work_dir: Path, dest_dir: Path):
365366
)
366367
cpy = fsobject.copy(dest_dir)
367368
assert cpy.hash_files() == fsobject.hash_files()
369+
370+
371+
class MtimeTestFile(File):
372+
373+
flag: int
374+
375+
@mtime_cached_property
376+
def cached_prop(self):
377+
return self.flag
378+
379+
380+
def test_mtime_cached_property(tmp_path: Path):
381+
fspath = tmp_path / "file_1.txt"
382+
fspath.write_text("hello")
383+
384+
file = MtimeTestFile(fspath)
385+
386+
file.flag = 0
387+
assert file.cached_prop == 0
388+
# Need a long delay to ensure the mtime changes on Ubuntu and particularly on Windows
389+
# On MacOS, the mtime resolution is much higher so not usually an issue. Use
390+
# explicitly cache clearing if needed
391+
time.sleep(2)
392+
file.flag = 1
393+
assert file.cached_prop == 0
394+
time.sleep(2)
395+
fspath.write_text("world")
396+
assert file.cached_prop == 1
397+
398+
399+
def test_mtime_cached_property_force_clear(tmp_path: Path):
400+
fspath = tmp_path / "file_1.txt"
401+
fspath.write_text("hello")
402+
403+
file = MtimeTestFile(fspath)
404+
405+
file.flag = 0
406+
assert file.cached_prop == 0
407+
file.flag = 1
408+
MtimeTestFile.cached_prop.clear(file)
409+
assert file.cached_prop == 1

fileformats/core/typing.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,10 @@
2222
PathType: TypeAlias = ty.Union[str, Path]
2323

2424

25-
__all__ = ["CryptoMethod", "FspathsInputType", "PathType", "TypeAlias", "Self"]
25+
__all__ = [
26+
"CryptoMethod",
27+
"FspathsInputType",
28+
"PathType",
29+
"TypeAlias",
30+
"Self",
31+
]

fileformats/core/utils.py

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from types import ModuleType
77
import urllib.request
88
import urllib.error
9+
from threading import RLock
910
import os
1011
import logging
1112
import pkgutil
@@ -97,22 +98,22 @@ def fspaths_converter(fspaths: FspathsInputType) -> ty.FrozenSet[Path]:
9798

9899
PropReturn = ty.TypeVar("PropReturn")
99100

101+
102+
def classproperty(meth: ty.Callable[..., PropReturn]) -> PropReturn:
103+
"""Access a @classmethod like a @property."""
104+
# mypy doesn't understand class properties yet: https://github.com/python/mypy/issues/2563
105+
return classmethod(property(meth)) # type: ignore
106+
107+
100108
if sys.version_info[:2] < (3, 9):
101109

102-
class classproperty(object):
110+
class classproperty(object): # type: ignore[no-redef] # noqa
103111
def __init__(self, f: ty.Callable[[ty.Type[ty.Any]], ty.Any]):
104112
self.f = f
105113

106114
def __get__(self, obj: ty.Any, owner: ty.Any) -> ty.Any:
107115
return self.f(owner)
108116

109-
else:
110-
111-
def classproperty(meth: ty.Callable[..., PropReturn]) -> PropReturn:
112-
"""Access a @classmethod like a @property."""
113-
# mypy doesn't understand class properties yet: https://github.com/python/mypy/issues/2563
114-
return classmethod(property(meth)) # type: ignore
115-
116117

117118
def add_exc_note(e: Exception, note: str) -> Exception:
118119
"""Adds a note to an exception in a Python <3.11 compatible way
@@ -248,3 +249,54 @@ def import_extras_module(klass: ty.Type["fileformats.core.DataType"]) -> ExtrasM
248249
else:
249250
extras_imported = True
250251
return ExtrasModule(extras_imported, extras_pkg, extras_pypi)
252+
253+
254+
ReturnType = ty.TypeVar("ReturnType")
255+
256+
257+
class mtime_cached_property:
258+
"""A property that is cached until the mtimes of the files in the fileset are changed"""
259+
260+
def __init__(self, func: ty.Callable[..., ty.Any]):
261+
self.func = func
262+
self.__doc__ = func.__doc__
263+
self.lock = RLock()
264+
265+
@property
266+
def _cache_name(self) -> str:
267+
return f"_{self.func.__name__}_mtime_cache"
268+
269+
def clear(self, instance: "fileformats.core.FileSet") -> None:
270+
"""Forcibly clear the cache"""
271+
del instance.__dict__[self._cache_name]
272+
273+
def __get__(
274+
self,
275+
instance: ty.Optional["fileformats.core.FileSet"],
276+
owner: ty.Optional[ty.Type["fileformats.core.FileSet"]] = None,
277+
) -> ty.Any:
278+
if instance is None: # if accessing property from class not instance
279+
return self
280+
assert isinstance(instance, fileformats.core.FileSet), (
281+
"Cannot use mtime_cached_property instance with "
282+
f"{type(instance).__name__!r} object, only FileSet objects."
283+
)
284+
try:
285+
mtimes, value = instance.__dict__[self._cache_name]
286+
except KeyError:
287+
pass
288+
else:
289+
if instance.mtimes == mtimes:
290+
return value
291+
with self.lock:
292+
# check if another thread filled cache while we awaited lock
293+
try:
294+
mtimes, value = instance.__dict__[self._cache_name]
295+
except KeyError:
296+
pass
297+
else:
298+
if instance.mtimes == mtimes:
299+
return value
300+
value = self.func(instance)
301+
instance.__dict__[self._cache_name] = (instance.mtimes, value)
302+
return value

fileformats/generic/file.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
FormatMismatchError,
66
UnconstrainedExtensionException,
77
)
8-
from fileformats.core.utils import classproperty
8+
from fileformats.core.utils import classproperty, mtime_cached_property
99
from .fsobject import FsObject
1010

1111

@@ -79,7 +79,7 @@ def copy_ext(
7979
)
8080
return Path(new_path).with_suffix(suffix)
8181

82-
@property
82+
@mtime_cached_property
8383
def contents(self) -> ty.Union[str, bytes]:
8484
return self.read_contents()
8585

@@ -125,7 +125,7 @@ def actual_ext(self) -> str:
125125
"(i.e. matches the None extension)"
126126
)
127127
# Return the longest matching extension, useful for optional extensions
128-
return sorted(matching, key=len)[-1] # type: ignore[no-any-return]
128+
return sorted(matching, key=len)[-1]
129129

130130
@property
131131
def stem(self) -> str:

0 commit comments

Comments
 (0)