Skip to content

Commit 98c89e9

Browse files
authored
Merge pull request #59 from ArcanaFramework/mount-identification
FS mount identification
2 parents 37a45d2 + 084ff3e commit 98c89e9

File tree

15 files changed

+973
-536
lines changed

15 files changed

+973
-536
lines changed

extras/fileformats/extras/application/medical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from fileformats.core import FileSet
55
from fileformats.application import Dicom
66
import medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c
7-
from fileformats.core.utils import SampleFileGenerator
7+
from fileformats.core import SampleFileGenerator
88

99

1010
@FileSet.read_metadata.register

fileformats/application/serialization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from fileformats.core.mixin import WithClassifiers
66
from ..generic import File
77
from fileformats.core.exceptions import FormatMismatchError
8-
from fileformats.core.utils import SampleFileGenerator
8+
from fileformats.core import SampleFileGenerator
99

1010

1111
class Schema(DataType):

fileformats/core/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
from .datatype import DataType
44
from .fileset import FileSet, MockMixin
55
from .field import Field
6-
from .utils import (
6+
from .identification import (
77
to_mime,
88
from_mime,
99
find_matching,
1010
from_paths,
1111
)
12+
from .sampling import SampleFileGenerator

fileformats/core/datatype.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
classproperty,
1414
subpackages,
1515
add_exc_note,
16+
)
17+
from .identification import (
1618
to_mime_format_name,
1719
from_mime_format_name,
1820
IANA_MIME_TYPE_REGISTRIES,
@@ -125,7 +127,7 @@ def from_mime(cls, mime_string):
125127
except ValueError:
126128
raise FormatRecognitionError(
127129
f"Format '{mime_string}' is not a valid MIME-like format of <namespace>/<format>"
128-
)
130+
) from None
129131
else:
130132
namespace = namespace.replace("-", "_")
131133
# Attempt to load file type using their `iana_mime` attribute

fileformats/core/fileset.py

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616
from .utils import (
1717
classproperty,
1818
fspaths_converter,
19-
to_mime_format_name,
20-
IANA_MIME_TYPE_REGISTRIES,
2119
describe_task,
2220
matching_source,
2321
import_extras_module,
24-
SampleFileGenerator,
22+
)
23+
from .sampling import SampleFileGenerator
24+
from .identification import (
25+
to_mime_format_name,
26+
IANA_MIME_TYPE_REGISTRIES,
2527
)
2628
from .converter import SubtypeVar
2729
from .classifier import Classifier
@@ -36,6 +38,8 @@
3638
)
3739
from .datatype import DataType
3840
from . import hook
41+
from .fs_mount_identifier import FsMountIdentifier
42+
3943

4044
try:
4145
from typing import Self
@@ -1181,6 +1185,7 @@ class CopyMode(Enum):
11811185

11821186
# All other combinations (typically the result of bit-masking)
11831187

1188+
leave_or_copy = 0b1001
11841189
leave_or_symlink = 0b0011
11851190
leave_or_hardlink = 0b0101
11861191
leave_or_link = 0b0111
@@ -1297,20 +1302,49 @@ def copy(
12971302
if isinstance(collation, str)
12981303
else collation
12991304
)
1300-
if new_stem:
1305+
# Rule out any copy modes that are not supported given the collation mode
1306+
# and file-system mounts the paths and destination directory reside on
1307+
constraints = []
1308+
if FsMountIdentifier.on_cifs(dest_dir) and mode & self.CopyMode.symlink:
1309+
supported_modes -= self.CopyMode.symlink
1310+
constraint = (
1311+
f"Destination directory is on CIFS mount ({dest_dir}) "
1312+
"and we therefore cannot create a symlink"
1313+
)
1314+
logger.debug(constraint)
1315+
constraints.append(constraint)
1316+
not_on_same_mount = [
1317+
p for p in self.fspaths if not FsMountIdentifier.on_same_mount(p, dest_dir)
1318+
]
1319+
if not_on_same_mount and mode & self.CopyMode.hardlink:
1320+
supported_modes -= self.CopyMode.hardlink
1321+
constraint = (
1322+
f"Some paths ({', '.join(str(p) for p in not_on_same_mount)}) are on "
1323+
f"not on same file-system mount as the destination directory {dest_dir}"
1324+
"and therefore cannot be hard-linked"
1325+
)
1326+
logger.debug(constraint)
1327+
constraints.append(constraint)
1328+
if new_stem or (
1329+
collation >= self.CopyCollation.siblings
1330+
and not all(p.parent == self.parent for p in self.fspaths)
1331+
):
13011332
supported_modes -= self.CopyMode.leave
1333+
1334+
# Get the intersection of copy modes that are supported and have been requested
13021335
selected_mode = mode & supported_modes
1303-
if collation >= self.CopyCollation.siblings:
1304-
if not all(p.parent == self.parent for p in self.fspaths):
1305-
selected_mode -= self.CopyMode.leave
13061336
if not selected_mode:
1307-
raise FileFormatsError(
1308-
f"Cannot copy {self} using {mode} mode as it is not supported by "
1309-
f"the {supported_modes} given the collation specification, {collation}"
1337+
msg = (
1338+
f"Cannot copy {self} using '{mode}' mode as it is not supported by "
1339+
f"the '{supported_modes}' given the collation specification, {collation}"
13101340
)
1341+
if constraints:
1342+
msg += ", and the following constraints:\n" + "\n".join(constraints)
1343+
raise FileFormatsError(msg)
13111344
if selected_mode & self.CopyMode.leave:
13121345
return self # Don't need to do anything
13131346

1347+
# Select inner copy/link methods
13141348
if selected_mode & self.CopyMode.symlink:
13151349
copy_dir = copy_file = os.symlink
13161350
elif selected_mode & self.CopyMode.hardlink:
@@ -1339,10 +1373,12 @@ def hardlink_dir(src: Path, dest: Path):
13391373
extension_decomposition=extension_decomposition,
13401374
)
13411375

1342-
dest_dir = Path(dest_dir) # ensure a Path not a string
1376+
# Prepare destination directory
1377+
dest_dir = Path(dest_dir)
13431378
if make_dirs:
13441379
dest_dir.mkdir(parents=True, exist_ok=True)
13451380

1381+
# Iterate through the paths to copy, copying them to the destination directory
13461382
new_paths = []
13471383
for fspath in fspaths_to_copy:
13481384
new_path, fspath = self._new_copy_path(
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
import os
2+
import typing as ty
3+
from pathlib import Path
4+
import re
5+
from contextlib import contextmanager
6+
import subprocess as sp
7+
from .utils import logger
8+
9+
10+
class FsMountIdentifier:
11+
"""Used to check the mount type that given file paths reside on in order to determine
12+
features that can be used (e.g. symlinks)"""
13+
14+
@classmethod
15+
def on_cifs(cls, path: os.PathLike) -> bool:
16+
"""
17+
Check whether a file path is on a CIFS filesystem mounted in a POSIX host.
18+
19+
POSIX hosts are assumed to have the ``mount`` command.
20+
21+
On Windows, Docker mounts host directories into containers through CIFS
22+
shares, which has support for Minshall+French symlinks, or text files that
23+
the CIFS driver exposes to the OS as symlinks.
24+
We have found that under concurrent access to the filesystem, this feature
25+
can result in failures to create or read recently-created symlinks,
26+
leading to inconsistent behavior and ``FileNotFoundError`` errors.
27+
28+
This check is written to support disabling symlinks on CIFS shares.
29+
30+
NB: This function and sub-functions are copied from the nipype.utils.filemanip module
31+
32+
33+
NB: Adapted from https://github.com/nipy/nipype
34+
"""
35+
return cls.get_mount(path)[1] == "cifs"
36+
37+
@classmethod
38+
def on_same_mount(cls, path1: os.PathLike, path2: os.PathLike) -> bool:
39+
"""Checks whether two or paths are on the same logical file system"""
40+
return cls.get_mount(path1)[0] == cls.get_mount(path2)[0]
41+
42+
@classmethod
43+
def get_mount(cls, path: os.PathLike) -> ty.Tuple[Path, str]:
44+
"""Get the mount point for a given file-system path
45+
46+
Parameters
47+
----------
48+
path: os.PathLike
49+
the file-system path to identify the mount of
50+
51+
Returns
52+
-------
53+
mount_point: os.PathLike
54+
the root of the mount the path sits on
55+
fstype : str
56+
the type of the file-system (e.g. ext4 or cifs)"""
57+
try:
58+
# Only the first match (most recent parent) counts, mount table sorted longest
59+
# to shortest
60+
return next(
61+
(Path(p), t)
62+
for p, t in cls.get_mount_table()
63+
if str(path).startswith(p)
64+
)
65+
except StopIteration:
66+
return (Path("/"), "ext4")
67+
68+
@classmethod
69+
def generate_cifs_table(cls) -> ty.List[ty.Tuple[str, str]]:
70+
"""
71+
Construct a reverse-length-ordered list of mount points that fall under a CIFS mount.
72+
73+
This precomputation allows efficient checking for whether a given path
74+
would be on a CIFS filesystem.
75+
On systems without a ``mount`` command, or with no CIFS mounts, returns an
76+
empty list.
77+
78+
"""
79+
exit_code, output = sp.getstatusoutput("mount")
80+
return cls.parse_mount_table(exit_code, output)
81+
82+
@classmethod
83+
def parse_mount_table(
84+
cls, exit_code: int, output: str
85+
) -> ty.List[ty.Tuple[str, str]]:
86+
"""
87+
Parse the output of ``mount`` to produce (path, fs_type) pairs.
88+
89+
Separated from _generate_cifs_table to enable testing logic with real
90+
outputs
91+
92+
"""
93+
# Not POSIX
94+
if exit_code != 0:
95+
return []
96+
97+
# Linux mount example: sysfs on /sys type sysfs (rw,nosuid,nodev,noexec)
98+
# <PATH>^^^^ ^^^^^<FSTYPE>
99+
# OSX mount example: /dev/disk2 on / (hfs, local, journaled)
100+
# <PATH>^ ^^^<FSTYPE>
101+
pattern = re.compile(r".*? on (/.*?) (?:type |\()([^\s,\)]+)")
102+
103+
# Keep line and match for error reporting (match == None on failure)
104+
# Ignore empty lines
105+
matches = [(ll, pattern.match(ll)) for ll in output.strip().splitlines() if ll]
106+
107+
# (path, fstype) tuples, sorted by path length (longest first)
108+
mount_info = sorted(
109+
(match.groups() for _, match in matches if match is not None),
110+
key=lambda x: len(x[0]),
111+
reverse=True,
112+
)
113+
cifs_paths = [path for path, fstype in mount_info if fstype.lower() == "cifs"]
114+
115+
# Report failures as warnings
116+
for line, match in matches:
117+
if match is None:
118+
logger.debug("Cannot parse mount line: '%s'", line)
119+
120+
return [
121+
mount
122+
for mount in mount_info
123+
if any(mount[0].startswith(path) for path in cifs_paths)
124+
]
125+
126+
@classmethod
127+
def get_mount_table(cls) -> ty.List[ty.Tuple[str, str]]:
128+
if cls._mount_table is None:
129+
cls._mount_table = cls.generate_cifs_table()
130+
return cls._mount_table
131+
132+
@classmethod
133+
@contextmanager
134+
def patch_table(cls, mount_table: ty.List[ty.Tuple[str, str]]):
135+
"""Patch the mount table with new values. Used in test routines"""
136+
orig_table = cls._mount_table
137+
cls._mount_table = list(mount_table)
138+
try:
139+
yield
140+
finally:
141+
cls._mount_table = orig_table
142+
143+
_mount_table: ty.Optional[ty.List[ty.Tuple[str, str]]] = None

0 commit comments

Comments
 (0)