Skip to content

Commit a98c507

Browse files
authored
upath._stat: add a os.stat_result compatible fsspec info wrapper (#179)
* upath._stat: add a os.stat_result compatible fsspec info wrapper * upath._stat: add repr to UPathStatResult * tests: add a basic size and ISDIR, ISREG test * upath.implementations.http: fix folders are labeled as 'file' in info * upath._stat: mention issue with HTTPPath.stat() on older fsspec as known issue
1 parent cfa0795 commit a98c507

File tree

6 files changed

+442
-3
lines changed

6 files changed

+442
-3
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ the dependencies. The following list will be kept up to date whenever we encount
189189
- **UPath().glob()** fsspec fixed its glob behavior when handling `**` patterns in versions `fsspec>=2023.9.0`
190190
- **GCSPath().mkdir()** a few mkdir quirks are solved by installing `gcsfs>=2022.7.1`
191191
- **fsspec.filesystem(WebdavPath().protocol)** the webdav protocol was added to fsspec in version `fsspec>=2022.5.0`
192+
- **stat.S_ISDIR(HTTPPath().stat().st_mode)** requires `fsspec>=2024.2.0` to correctly return `True` for directories
192193

193194
## Contributing
194195

upath/_stat.py

Lines changed: 385 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
from __future__ import annotations
2+
3+
import os
4+
import warnings
5+
from datetime import datetime
6+
from stat import S_IFDIR
7+
from stat import S_IFLNK
8+
from stat import S_IFREG
9+
from typing import Any
10+
from typing import Iterator
11+
from typing import Mapping
12+
from typing import Sequence
13+
14+
__all__ = [
15+
"UPathStatResult",
16+
]
17+
18+
19+
def _convert_value_to_timestamp(value: Any) -> int | float:
20+
"""Try to convert a datetime-like value to a timestamp."""
21+
if isinstance(value, (int, float)):
22+
return value
23+
elif isinstance(value, str):
24+
if value.endswith("Z"):
25+
value = value[:-1] + "+00:00"
26+
return datetime.fromisoformat(value).timestamp()
27+
elif isinstance(value, datetime):
28+
return value.timestamp()
29+
else:
30+
warnings.warn(
31+
f"Cannot convert {value!r} of type {type(value)!r} to a timestamp."
32+
" Please report this at: https://github.com/fsspec/universal_path/issues",
33+
RuntimeWarning,
34+
stacklevel=2,
35+
)
36+
raise TypeError(f"Cannot convert {value!r} to a timestamp.")
37+
38+
39+
def _get_stat_result_extra_fields() -> tuple[str, ...]:
40+
"""retrieve the extra fields of the os.stat_result class."""
41+
# Note:
42+
# The lines below let us provide a dictionary with the additional
43+
# named fields of the stat_result class as keys and the internal
44+
# index of the field as value.
45+
sr = os.stat_result(range(os.stat_result.n_fields))
46+
_, (_, extra) = sr.__reduce__()
47+
extra_fields = sorted(extra, key=extra.__getitem__)
48+
return tuple(extra_fields)
49+
50+
51+
class UPathStatResult:
52+
"""A stat_result compatible class wrapping fsspec info dicts.
53+
54+
**Note**: It is unlikely that you will ever have to instantiate
55+
this class directly. If you want to convert and info dict,
56+
use: `UPathStatResult.from_info(info)`
57+
58+
This object may be accessed either as a tuple of
59+
(mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime)
60+
or via the attributes st_mode, st_ino, st_dev, st_nlink, st_uid, and so on.
61+
62+
There's an additional method `as_info()` for accessing the info dict.
63+
This is useful to access additional information provided by the file system
64+
implementation, that's not covered by the stat_result tuple.
65+
66+
"""
67+
68+
__slots__ = ("_seq", "_info")
69+
# Note:
70+
# can't derive from os.stat_result at all, and can't derive from
71+
# tuple and have slots. So we duck type the os.stat_result class
72+
73+
# Add the fields and "extra fields" of the os.stat_result class
74+
_fields = (
75+
"st_mode",
76+
"st_ino",
77+
"st_dev",
78+
"st_nlink",
79+
"st_uid",
80+
"st_gid",
81+
"st_size",
82+
"st_atime",
83+
"st_mtime",
84+
"st_ctime",
85+
)
86+
_fields_extra = _get_stat_result_extra_fields()
87+
88+
# Provide the n_ attributes of the os.stat_result class for compatibility
89+
n_sequence_fields = len(_fields)
90+
n_fields = len(_fields) + len(_fields_extra)
91+
n_unnamed_fields = len(set(_fields_extra).intersection(_fields))
92+
93+
if (
94+
n_fields != os.stat_result.n_fields
95+
or n_sequence_fields != os.stat_result.n_sequence_fields
96+
or n_unnamed_fields != os.stat_result.n_unnamed_fields
97+
):
98+
warnings.warn(
99+
"UPathStatResult: The assumed number of fields in the"
100+
" stat_result class is not correct. Got: "
101+
f" {_fields!r}, {_fields_extra!r}, {os.stat_result.n_fields}"
102+
" This might cause problems? Please report this issue at:"
103+
" https://github.com/fsspec/universal_path/issues",
104+
RuntimeWarning,
105+
stacklevel=2,
106+
)
107+
108+
def __init__(
109+
self,
110+
stat_result_seq: Sequence[int],
111+
info_dict: Mapping[str, Any] | None = None,
112+
) -> None:
113+
"""init compatible with os.stat_result
114+
115+
Use `UPathStatResult.from_info(info)` to instantiate from a fsspec info.
116+
"""
117+
seq = tuple(stat_result_seq)
118+
if n := len(seq) < self.n_sequence_fields:
119+
raise TypeError(
120+
f"{self.__name__} takes at least {self.n_fields}-sequence"
121+
" ({n}-sequence given)"
122+
)
123+
elif n > self.n_fields:
124+
raise TypeError(
125+
f"{self.__name__} takes at most {self.n_fields}-sequence"
126+
" ({n}-sequence given)"
127+
)
128+
elif self.n_sequence_fields <= n < self.n_sequence_fields:
129+
warnings.warn(
130+
"UPathStatResult: The seq provided more than"
131+
f" {self.n_sequence_fields} items. Ignoring the extra items...",
132+
UserWarning,
133+
stacklevel=2,
134+
)
135+
self._seq = seq[: self.n_sequence_fields]
136+
self._info = info_dict or {}
137+
138+
def __repr__(self):
139+
cls_name = type(self).__name__
140+
seq_attrs = ", ".join(map("{0[0]}={0[1]}".format, zip(self._fields, self)))
141+
return f"{cls_name}({seq_attrs}, info={self._info!r})"
142+
143+
# --- access to the fsspec info dict ------------------------------
144+
145+
@classmethod
146+
def from_info(cls, info: Mapping[str, Any]) -> UPathStatResult:
147+
"""Create a UPathStatResult from a fsspec info dict."""
148+
# fill all the fallback default values with 0
149+
defaults = [0] * cls.n_sequence_fields
150+
return cls(defaults, info)
151+
152+
def as_info(self) -> Mapping[str, Any]:
153+
"""Return the fsspec info dict."""
154+
return self._info
155+
156+
# --- guaranteed fields -------------------------------------------
157+
158+
@property
159+
def st_mode(self) -> int:
160+
"""protection bits"""
161+
mode = self._info.get("mode")
162+
if isinstance(mode, int):
163+
return mode
164+
elif isinstance(mode, str):
165+
try:
166+
return int(mode, 8)
167+
except ValueError:
168+
pass
169+
170+
type_ = self._info.get("type")
171+
if type_ == "file":
172+
return S_IFREG # see: stat.S_ISREG
173+
elif type_ == "directory":
174+
return S_IFDIR # see: stat.S_ISDIR
175+
176+
if self._info.get("isLink"):
177+
return S_IFLNK # see: stat.S_ISLNK
178+
179+
return self._seq[0]
180+
181+
@property
182+
def st_ino(self) -> int:
183+
"""inode"""
184+
ino = self._info.get("ino")
185+
if isinstance(ino, int):
186+
return ino
187+
return self._seq[1]
188+
189+
@property
190+
def st_dev(self) -> int:
191+
"""device"""
192+
dev = self._info.get("dev")
193+
if isinstance(dev, int):
194+
return dev
195+
return self._seq[2]
196+
197+
@property
198+
def st_nlink(self) -> int:
199+
"""number of hard links"""
200+
nlink = self._info.get("nlink")
201+
if isinstance(nlink, int):
202+
return nlink
203+
return self._seq[3]
204+
205+
@property
206+
def st_uid(self) -> int:
207+
"""user ID of owner"""
208+
for key in ["uid", "owner", "uname", "unix.owner"]:
209+
try:
210+
return int(self._info[key])
211+
except (ValueError, TypeError, KeyError):
212+
pass
213+
return self._seq[4]
214+
215+
@property
216+
def st_gid(self) -> int:
217+
"""group ID of owner"""
218+
for key in ["gid", "group", "gname", "unix.group"]:
219+
try:
220+
return int(self._info[key])
221+
except (ValueError, TypeError, KeyError):
222+
pass
223+
return self._seq[5]
224+
225+
@property
226+
def st_size(self) -> int:
227+
"""total size, in bytes"""
228+
try:
229+
return int(self._info["size"])
230+
except (ValueError, TypeError, KeyError):
231+
return self._seq[6]
232+
233+
@property
234+
def st_atime(self) -> int | float:
235+
"""time of last access"""
236+
for key in ["atime", "time", "last_accessed", "accessTime"]:
237+
try:
238+
raw_value = self._info[key]
239+
except KeyError:
240+
continue
241+
try:
242+
return _convert_value_to_timestamp(raw_value)
243+
except (TypeError, ValueError):
244+
pass
245+
return self._seq[7]
246+
247+
@property
248+
def st_mtime(self) -> int | float:
249+
"""time of last modification"""
250+
for key in [
251+
"mtime",
252+
"LastModified",
253+
"last_modified",
254+
"timeModified",
255+
"modificationTime",
256+
"modified_at",
257+
]:
258+
try:
259+
raw_value = self._info[key]
260+
except KeyError:
261+
continue
262+
try:
263+
return _convert_value_to_timestamp(raw_value)
264+
except (TypeError, ValueError):
265+
pass
266+
return self._seq[8]
267+
268+
@property
269+
def st_ctime(self) -> int | float:
270+
"""time of last change"""
271+
try:
272+
raw_value = self._info["ctime"]
273+
except KeyError:
274+
pass
275+
else:
276+
try:
277+
return _convert_value_to_timestamp(raw_value)
278+
except (TypeError, ValueError):
279+
pass
280+
return self._seq[9]
281+
282+
# --- extra fields ------------------------------------------------
283+
284+
def __getattr__(self, item):
285+
if item in self._fields_extra:
286+
return 0 # fallback default value
287+
raise AttributeError(item)
288+
289+
if "st_birthtime" in _fields_extra:
290+
291+
@property
292+
def st_birthtime(self) -> int | float:
293+
"""time of creation"""
294+
for key in ["created", "creation_time", "timeCreated", "created_at"]:
295+
try:
296+
raw_value = self._info[key]
297+
except KeyError:
298+
continue
299+
try:
300+
return _convert_value_to_timestamp(raw_value)
301+
except (TypeError, ValueError):
302+
pass
303+
return 0
304+
305+
# --- os.stat_result tuple interface ------------------------------
306+
307+
def __len__(self) -> int:
308+
return len(self._fields)
309+
310+
def __iter__(self) -> Iterator[int]:
311+
"""the sequence interface iterates over the guaranteed fields.
312+
313+
All values are integers.
314+
"""
315+
for field in self._fields:
316+
yield int(getattr(self, field))
317+
318+
def index(self, value: int, start: int = 0, stop: int = None, /) -> int:
319+
"""the sequence interface index method."""
320+
if stop is None:
321+
stop = len(self._seq)
322+
return self._seq.index(value, start, stop)
323+
324+
def count(self, value: int) -> int:
325+
"""the sequence interface count method."""
326+
return self._seq.count(value)
327+
328+
# --- compatibility with the fsspec info dict interface ------------
329+
330+
def __getitem__(self, item: int | str) -> Any:
331+
if isinstance(item, str):
332+
warnings.warn(
333+
"Access the fsspec info via `.as_info()[key]`",
334+
DeprecationWarning,
335+
stacklevel=2,
336+
)
337+
return self._info[item]
338+
# we need to go via the attributes and cast to int
339+
attr = self._fields[item]
340+
return int(getattr(self, attr))
341+
342+
def keys(self):
343+
"""compatibility with the fsspec info dict interface."""
344+
warnings.warn(
345+
"Access the fsspec info via `.as_info().keys()`",
346+
DeprecationWarning,
347+
stacklevel=2,
348+
)
349+
return self._info.keys()
350+
351+
def values(self):
352+
"""compatibility with the fsspec info dict interface."""
353+
warnings.warn(
354+
"Access the fsspec info via `.as_info().values()`",
355+
DeprecationWarning,
356+
stacklevel=2,
357+
)
358+
return self._info.values()
359+
360+
def items(self):
361+
"""compatibility with the fsspec info dict interface."""
362+
warnings.warn(
363+
"Access the fsspec info via `.as_info().items()`",
364+
DeprecationWarning,
365+
stacklevel=2,
366+
)
367+
return self._info.items()
368+
369+
def get(self, key, default=None):
370+
"""compatibility with the fsspec info dict interface."""
371+
warnings.warn(
372+
"Access the fsspec info via `.as_info().get(key, default)`",
373+
DeprecationWarning,
374+
stacklevel=2,
375+
)
376+
return self._info.get(key, default)
377+
378+
def copy(self):
379+
"""compatibility with the fsspec info dict interface."""
380+
warnings.warn(
381+
"Access the fsspec info via `.as_info().copy()`",
382+
DeprecationWarning,
383+
stacklevel=2,
384+
)
385+
return self._info.copy()

0 commit comments

Comments
 (0)