reach-vb HF staff commited on
Commit
3962680
1 Parent(s): 2d15f4c

6d3bea4b268fcd6555b43f7754e3b8a3bbf7a5122b1e4f66294f0383b953f763

Browse files
Files changed (50) hide show
  1. lib/python3.11/site-packages/filelock-3.13.1.dist-info/INSTALLER +1 -0
  2. lib/python3.11/site-packages/filelock-3.13.1.dist-info/METADATA +56 -0
  3. lib/python3.11/site-packages/filelock-3.13.1.dist-info/RECORD +22 -0
  4. lib/python3.11/site-packages/filelock-3.13.1.dist-info/WHEEL +4 -0
  5. lib/python3.11/site-packages/filelock-3.13.1.dist-info/licenses/LICENSE +24 -0
  6. lib/python3.11/site-packages/filelock/__pycache__/_unix.cpython-311.pyc +0 -0
  7. lib/python3.11/site-packages/filelock/__pycache__/_util.cpython-311.pyc +0 -0
  8. lib/python3.11/site-packages/filelock/__pycache__/_windows.cpython-311.pyc +0 -0
  9. lib/python3.11/site-packages/filelock/__pycache__/version.cpython-311.pyc +0 -0
  10. lib/python3.11/site-packages/filelock/_api.py +323 -0
  11. lib/python3.11/site-packages/filelock/_error.py +30 -0
  12. lib/python3.11/site-packages/filelock/_soft.py +47 -0
  13. lib/python3.11/site-packages/filelock/_unix.py +65 -0
  14. lib/python3.11/site-packages/filelock/_util.py +47 -0
  15. lib/python3.11/site-packages/filelock/_windows.py +65 -0
  16. lib/python3.11/site-packages/filelock/py.typed +0 -0
  17. lib/python3.11/site-packages/filelock/version.py +16 -0
  18. lib/python3.11/site-packages/fsspec/__init__.py +69 -0
  19. lib/python3.11/site-packages/fsspec/__pycache__/__init__.cpython-311.pyc +0 -0
  20. lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc +0 -0
  21. lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc +0 -0
  22. lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc +0 -0
  23. lib/python3.11/site-packages/fsspec/__pycache__/caching.cpython-311.pyc +0 -0
  24. lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc +0 -0
  25. lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc +0 -0
  26. lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc +0 -0
  27. lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc +0 -0
  28. lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc +0 -0
  29. lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc +0 -0
  30. lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc +0 -0
  31. lib/python3.11/site-packages/fsspec/__pycache__/fuse.cpython-311.pyc +0 -0
  32. lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc +0 -0
  33. lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc +0 -0
  34. lib/python3.11/site-packages/fsspec/__pycache__/mapping.cpython-311.pyc +0 -0
  35. lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc +0 -0
  36. lib/python3.11/site-packages/fsspec/__pycache__/registry.cpython-311.pyc +0 -0
  37. lib/python3.11/site-packages/fsspec/__pycache__/spec.cpython-311.pyc +0 -0
  38. lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc +0 -0
  39. lib/python3.11/site-packages/fsspec/__pycache__/utils.cpython-311.pyc +0 -0
  40. lib/python3.11/site-packages/fsspec/_version.py +21 -0
  41. lib/python3.11/site-packages/fsspec/archive.py +73 -0
  42. lib/python3.11/site-packages/fsspec/asyn.py +1081 -0
  43. lib/python3.11/site-packages/fsspec/caching.py +875 -0
  44. lib/python3.11/site-packages/fsspec/callbacks.py +238 -0
  45. lib/python3.11/site-packages/fsspec/compression.py +174 -0
  46. lib/python3.11/site-packages/fsspec/config.py +131 -0
  47. lib/python3.11/site-packages/fsspec/conftest.py +55 -0
  48. lib/python3.11/site-packages/fsspec/core.py +710 -0
  49. lib/python3.11/site-packages/fsspec/dircache.py +98 -0
  50. lib/python3.11/site-packages/fsspec/exceptions.py +21 -0
lib/python3.11/site-packages/filelock-3.13.1.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
lib/python3.11/site-packages/filelock-3.13.1.dist-info/METADATA ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: filelock
3
+ Version: 3.13.1
4
+ Summary: A platform independent file lock.
5
+ Project-URL: Documentation, https://py-filelock.readthedocs.io
6
+ Project-URL: Homepage, https://github.com/tox-dev/py-filelock
7
+ Project-URL: Source, https://github.com/tox-dev/py-filelock
8
+ Project-URL: Tracker, https://github.com/tox-dev/py-filelock/issues
9
+ Maintainer-email: Bernát Gábor <gaborjbernat@gmail.com>
10
+ License-Expression: Unlicense
11
+ License-File: LICENSE
12
+ Keywords: application,cache,directory,log,user
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: The Unlicense (Unlicense)
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python
18
+ Classifier: Programming Language :: Python :: 3 :: Only
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Topic :: Internet
25
+ Classifier: Topic :: Software Development :: Libraries
26
+ Classifier: Topic :: System
27
+ Requires-Python: >=3.8
28
+ Provides-Extra: docs
29
+ Requires-Dist: furo>=2023.9.10; extra == 'docs'
30
+ Requires-Dist: sphinx-autodoc-typehints!=1.23.4,>=1.24; extra == 'docs'
31
+ Requires-Dist: sphinx>=7.2.6; extra == 'docs'
32
+ Provides-Extra: testing
33
+ Requires-Dist: covdefaults>=2.3; extra == 'testing'
34
+ Requires-Dist: coverage>=7.3.2; extra == 'testing'
35
+ Requires-Dist: diff-cover>=8; extra == 'testing'
36
+ Requires-Dist: pytest-cov>=4.1; extra == 'testing'
37
+ Requires-Dist: pytest-mock>=3.12; extra == 'testing'
38
+ Requires-Dist: pytest-timeout>=2.2; extra == 'testing'
39
+ Requires-Dist: pytest>=7.4.3; extra == 'testing'
40
+ Provides-Extra: typing
41
+ Requires-Dist: typing-extensions>=4.8; python_version < '3.11' and extra == 'typing'
42
+ Description-Content-Type: text/markdown
43
+
44
+ # filelock
45
+
46
+ [![PyPI](https://img.shields.io/pypi/v/filelock)](https://pypi.org/project/filelock/)
47
+ [![Supported Python
48
+ versions](https://img.shields.io/pypi/pyversions/filelock.svg)](https://pypi.org/project/filelock/)
49
+ [![Documentation
50
+ status](https://readthedocs.org/projects/py-filelock/badge/?version=latest)](https://py-filelock.readthedocs.io/en/latest/?badge=latest)
51
+ [![Code style:
52
+ black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
53
+ [![Downloads](https://static.pepy.tech/badge/filelock/month)](https://pepy.tech/project/filelock)
54
+ [![check](https://github.com/tox-dev/py-filelock/actions/workflows/check.yml/badge.svg)](https://github.com/tox-dev/py-filelock/actions/workflows/check.yml)
55
+
56
+ For more information checkout the [official documentation](https://py-filelock.readthedocs.io/en/latest/index.html).
lib/python3.11/site-packages/filelock-3.13.1.dist-info/RECORD ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ filelock-3.13.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ filelock-3.13.1.dist-info/METADATA,sha256=gi7LyG-dEuOBZC32wie-OOG0OkPZHABsn9rXvxuQlcA,2784
3
+ filelock-3.13.1.dist-info/RECORD,,
4
+ filelock-3.13.1.dist-info/WHEEL,sha256=9QBuHhg6FNW7lppboF2vKVbCGTVzsFykgRQjjlajrhA,87
5
+ filelock-3.13.1.dist-info/licenses/LICENSE,sha256=iNm062BXnBkew5HKBMFhMFctfu3EqG2qWL8oxuFMm80,1210
6
+ filelock/__init__.py,sha256=wAVZ_9_-3Y14xzzupRk5BTTRewFJekR2vf9oIx4M750,1213
7
+ filelock/__pycache__/__init__.cpython-311.pyc,,
8
+ filelock/__pycache__/_api.cpython-311.pyc,,
9
+ filelock/__pycache__/_error.cpython-311.pyc,,
10
+ filelock/__pycache__/_soft.cpython-311.pyc,,
11
+ filelock/__pycache__/_unix.cpython-311.pyc,,
12
+ filelock/__pycache__/_util.cpython-311.pyc,,
13
+ filelock/__pycache__/_windows.cpython-311.pyc,,
14
+ filelock/__pycache__/version.cpython-311.pyc,,
15
+ filelock/_api.py,sha256=UsVWPEOOgFH1pR_6WMk2b5hWZ7nWhUPT5GZX9WuYaC8,11860
16
+ filelock/_error.py,sha256=-5jMcjTu60YAvAO1UbqDD1GIEjVkwr8xCFwDBtMeYDg,787
17
+ filelock/_soft.py,sha256=haqtc_TB_KJbYv2a8iuEAclKuM4fMG1vTcp28sK919c,1711
18
+ filelock/_unix.py,sha256=ViG38PgJsIhT3xaArugvw0TPP6VWoP2VJj7FEIWypkg,2157
19
+ filelock/_util.py,sha256=dBDlIj1dHL_juXX0Qqq6bZtyE53YZTN8GFhtyTV043o,1708
20
+ filelock/_windows.py,sha256=eMKL8dZKrgekf5VYVGR14an29JGEInRtUO8ui9ABywg,2177
21
+ filelock/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ filelock/version.py,sha256=fmajg3X8ZdOn-UpUewARwK5cfYf4wP4Xa0DcHjigFYo,413
lib/python3.11/site-packages/filelock-3.13.1.dist-info/WHEEL ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.18.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
lib/python3.11/site-packages/filelock-3.13.1.dist-info/licenses/LICENSE ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <http://unlicense.org>
lib/python3.11/site-packages/filelock/__pycache__/_unix.cpython-311.pyc ADDED
Binary file (3.6 kB). View file
 
lib/python3.11/site-packages/filelock/__pycache__/_util.cpython-311.pyc ADDED
Binary file (2.21 kB). View file
 
lib/python3.11/site-packages/filelock/__pycache__/_windows.cpython-311.pyc ADDED
Binary file (3.69 kB). View file
 
lib/python3.11/site-packages/filelock/__pycache__/version.cpython-311.pyc ADDED
Binary file (681 Bytes). View file
 
lib/python3.11/site-packages/filelock/_api.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import logging
5
+ import os
6
+ import time
7
+ import warnings
8
+ from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass
10
+ from threading import local
11
+ from typing import TYPE_CHECKING, Any, ClassVar
12
+ from weakref import WeakValueDictionary
13
+
14
+ from ._error import Timeout
15
+
16
+ if TYPE_CHECKING:
17
+ import sys
18
+ from types import TracebackType
19
+
20
+ if sys.version_info >= (3, 11): # pragma: no cover (py311+)
21
+ from typing import Self
22
+ else: # pragma: no cover (<py311)
23
+ from typing_extensions import Self
24
+
25
+
26
+ _LOGGER = logging.getLogger("filelock")
27
+
28
+
29
+ # This is a helper class which is returned by :meth:`BaseFileLock.acquire` and wraps the lock to make sure __enter__
30
+ # is not called twice when entering the with statement. If we would simply return *self*, the lock would be acquired
31
+ # again in the *__enter__* method of the BaseFileLock, but not released again automatically. issue #37 (memory leak)
32
+ class AcquireReturnProxy:
33
+ """A context aware object that will release the lock file when exiting."""
34
+
35
+ def __init__(self, lock: BaseFileLock) -> None:
36
+ self.lock = lock
37
+
38
+ def __enter__(self) -> BaseFileLock:
39
+ return self.lock
40
+
41
+ def __exit__(
42
+ self,
43
+ exc_type: type[BaseException] | None,
44
+ exc_value: BaseException | None,
45
+ traceback: TracebackType | None,
46
+ ) -> None:
47
+ self.lock.release()
48
+
49
+
50
+ @dataclass
51
+ class FileLockContext:
52
+ """A dataclass which holds the context for a ``BaseFileLock`` object."""
53
+
54
+ # The context is held in a separate class to allow optional use of thread local storage via the
55
+ # ThreadLocalFileContext class.
56
+
57
+ #: The path to the lock file.
58
+ lock_file: str
59
+
60
+ #: The default timeout value.
61
+ timeout: float
62
+
63
+ #: The mode for the lock files
64
+ mode: int
65
+
66
+ #: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held
67
+ lock_file_fd: int | None = None
68
+
69
+ #: The lock counter is used for implementing the nested locking mechanism.
70
+ lock_counter: int = 0 # When the lock is acquired is increased and the lock is only released, when this value is 0
71
+
72
+
73
+ class ThreadLocalFileContext(FileLockContext, local):
74
+ """A thread local version of the ``FileLockContext`` class."""
75
+
76
+
77
+ class BaseFileLock(ABC, contextlib.ContextDecorator):
78
+ """Abstract base class for a file lock object."""
79
+
80
+ _instances: ClassVar[WeakValueDictionary[str, BaseFileLock]] = WeakValueDictionary()
81
+
82
+ def __new__( # noqa: PLR0913
83
+ cls,
84
+ lock_file: str | os.PathLike[str],
85
+ timeout: float = -1, # noqa: ARG003
86
+ mode: int = 0o644, # noqa: ARG003
87
+ thread_local: bool = True, # noqa: ARG003, FBT001, FBT002
88
+ *,
89
+ is_singleton: bool = False,
90
+ **kwargs: dict[str, Any], # capture remaining kwargs for subclasses # noqa: ARG003
91
+ ) -> Self:
92
+ """Create a new lock object or if specified return the singleton instance for the lock file."""
93
+ if not is_singleton:
94
+ return super().__new__(cls)
95
+
96
+ instance = cls._instances.get(str(lock_file))
97
+ if not instance:
98
+ instance = super().__new__(cls)
99
+ cls._instances[str(lock_file)] = instance
100
+
101
+ return instance # type: ignore[return-value] # https://github.com/python/mypy/issues/15322
102
+
103
+ def __init__( # noqa: PLR0913
104
+ self,
105
+ lock_file: str | os.PathLike[str],
106
+ timeout: float = -1,
107
+ mode: int = 0o644,
108
+ thread_local: bool = True, # noqa: FBT001, FBT002
109
+ *,
110
+ is_singleton: bool = False,
111
+ ) -> None:
112
+ """
113
+ Create a new lock object.
114
+
115
+ :param lock_file: path to the file
116
+ :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \
117
+ the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \
118
+ to a negative value. A timeout of 0 means, that there is exactly one attempt to acquire the file lock.
119
+ :param mode: file permissions for the lockfile
120
+ :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \
121
+ ``False`` then the lock will be reentrant across threads.
122
+ :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \
123
+ per lock file. This is useful if you want to use the lock object for reentrant locking without needing \
124
+ to pass the same object around.
125
+ """
126
+ self._is_thread_local = thread_local
127
+ self._is_singleton = is_singleton
128
+
129
+ # Create the context. Note that external code should not work with the context directly and should instead use
130
+ # properties of this class.
131
+ kwargs: dict[str, Any] = {
132
+ "lock_file": os.fspath(lock_file),
133
+ "timeout": timeout,
134
+ "mode": mode,
135
+ }
136
+ self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs)
137
+
138
+ def is_thread_local(self) -> bool:
139
+ """:return: a flag indicating if this lock is thread local or not"""
140
+ return self._is_thread_local
141
+
142
+ @property
143
+ def is_singleton(self) -> bool:
144
+ """:return: a flag indicating if this lock is singleton or not"""
145
+ return self._is_singleton
146
+
147
+ @property
148
+ def lock_file(self) -> str:
149
+ """:return: path to the lock file"""
150
+ return self._context.lock_file
151
+
152
+ @property
153
+ def timeout(self) -> float:
154
+ """
155
+ :return: the default timeout value, in seconds
156
+
157
+ .. versionadded:: 2.0.0
158
+ """
159
+ return self._context.timeout
160
+
161
+ @timeout.setter
162
+ def timeout(self, value: float | str) -> None:
163
+ """
164
+ Change the default timeout value.
165
+
166
+ :param value: the new value, in seconds
167
+ """
168
+ self._context.timeout = float(value)
169
+
170
+ @abstractmethod
171
+ def _acquire(self) -> None:
172
+ """If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file."""
173
+ raise NotImplementedError
174
+
175
+ @abstractmethod
176
+ def _release(self) -> None:
177
+ """Releases the lock and sets self._context.lock_file_fd to None."""
178
+ raise NotImplementedError
179
+
180
+ @property
181
+ def is_locked(self) -> bool:
182
+ """
183
+
184
+ :return: A boolean indicating if the lock file is holding the lock currently.
185
+
186
+ .. versionchanged:: 2.0.0
187
+
188
+ This was previously a method and is now a property.
189
+ """
190
+ return self._context.lock_file_fd is not None
191
+
192
+ @property
193
+ def lock_counter(self) -> int:
194
+ """:return: The number of times this lock has been acquired (but not yet released)."""
195
+ return self._context.lock_counter
196
+
197
+ def acquire(
198
+ self,
199
+ timeout: float | None = None,
200
+ poll_interval: float = 0.05,
201
+ *,
202
+ poll_intervall: float | None = None,
203
+ blocking: bool = True,
204
+ ) -> AcquireReturnProxy:
205
+ """
206
+ Try to acquire the file lock.
207
+
208
+ :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and
209
+ if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired
210
+ :param poll_interval: interval of trying to acquire the lock file
211
+ :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead
212
+ :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
213
+ first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
214
+ :raises Timeout: if fails to acquire lock within the timeout period
215
+ :return: a context object that will unlock the file when the context is exited
216
+
217
+ .. code-block:: python
218
+
219
+ # You can use this method in the context manager (recommended)
220
+ with lock.acquire():
221
+ pass
222
+
223
+ # Or use an equivalent try-finally construct:
224
+ lock.acquire()
225
+ try:
226
+ pass
227
+ finally:
228
+ lock.release()
229
+
230
+ .. versionchanged:: 2.0.0
231
+
232
+ This method returns now a *proxy* object instead of *self*,
233
+ so that it can be used in a with statement without side effects.
234
+
235
+ """
236
+ # Use the default timeout, if no timeout is provided.
237
+ if timeout is None:
238
+ timeout = self._context.timeout
239
+
240
+ if poll_intervall is not None:
241
+ msg = "use poll_interval instead of poll_intervall"
242
+ warnings.warn(msg, DeprecationWarning, stacklevel=2)
243
+ poll_interval = poll_intervall
244
+
245
+ # Increment the number right at the beginning. We can still undo it, if something fails.
246
+ self._context.lock_counter += 1
247
+
248
+ lock_id = id(self)
249
+ lock_filename = self.lock_file
250
+ start_time = time.perf_counter()
251
+ try:
252
+ while True:
253
+ if not self.is_locked:
254
+ _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
255
+ self._acquire()
256
+ if self.is_locked:
257
+ _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
258
+ break
259
+ if blocking is False:
260
+ _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
261
+ raise Timeout(lock_filename) # noqa: TRY301
262
+ if 0 <= timeout < time.perf_counter() - start_time:
263
+ _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
264
+ raise Timeout(lock_filename) # noqa: TRY301
265
+ msg = "Lock %s not acquired on %s, waiting %s seconds ..."
266
+ _LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
267
+ time.sleep(poll_interval)
268
+ except BaseException: # Something did go wrong, so decrement the counter.
269
+ self._context.lock_counter = max(0, self._context.lock_counter - 1)
270
+ raise
271
+ return AcquireReturnProxy(lock=self)
272
+
273
+ def release(self, force: bool = False) -> None: # noqa: FBT001, FBT002
274
+ """
275
+ Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. Also
276
+ note, that the lock file itself is not automatically deleted.
277
+
278
+ :param force: If true, the lock counter is ignored and the lock is released in every case/
279
+ """
280
+ if self.is_locked:
281
+ self._context.lock_counter -= 1
282
+
283
+ if self._context.lock_counter == 0 or force:
284
+ lock_id, lock_filename = id(self), self.lock_file
285
+
286
+ _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
287
+ self._release()
288
+ self._context.lock_counter = 0
289
+ _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
290
+
291
+ def __enter__(self) -> Self:
292
+ """
293
+ Acquire the lock.
294
+
295
+ :return: the lock object
296
+ """
297
+ self.acquire()
298
+ return self
299
+
300
+ def __exit__(
301
+ self,
302
+ exc_type: type[BaseException] | None,
303
+ exc_value: BaseException | None,
304
+ traceback: TracebackType | None,
305
+ ) -> None:
306
+ """
307
+ Release the lock.
308
+
309
+ :param exc_type: the exception type if raised
310
+ :param exc_value: the exception value if raised
311
+ :param traceback: the exception traceback if raised
312
+ """
313
+ self.release()
314
+
315
+ def __del__(self) -> None:
316
+ """Called when the lock object is deleted."""
317
+ self.release(force=True)
318
+
319
+
320
+ __all__ = [
321
+ "BaseFileLock",
322
+ "AcquireReturnProxy",
323
+ ]
lib/python3.11/site-packages/filelock/_error.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+
6
+ class Timeout(TimeoutError): # noqa: N818
7
+ """Raised when the lock could not be acquired in *timeout* seconds."""
8
+
9
+ def __init__(self, lock_file: str) -> None:
10
+ super().__init__()
11
+ self._lock_file = lock_file
12
+
13
+ def __reduce__(self) -> str | tuple[Any, ...]:
14
+ return self.__class__, (self._lock_file,) # Properly pickle the exception
15
+
16
+ def __str__(self) -> str:
17
+ return f"The file lock '{self._lock_file}' could not be acquired."
18
+
19
+ def __repr__(self) -> str:
20
+ return f"{self.__class__.__name__}({self.lock_file!r})"
21
+
22
+ @property
23
+ def lock_file(self) -> str:
24
+ """:return: The path of the file lock."""
25
+ return self._lock_file
26
+
27
+
28
+ __all__ = [
29
+ "Timeout",
30
+ ]
lib/python3.11/site-packages/filelock/_soft.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from contextlib import suppress
6
+ from errno import EACCES, EEXIST
7
+ from pathlib import Path
8
+
9
+ from ._api import BaseFileLock
10
+ from ._util import ensure_directory_exists, raise_on_not_writable_file
11
+
12
+
13
+ class SoftFileLock(BaseFileLock):
14
+ """Simply watches the existence of the lock file."""
15
+
16
+ def _acquire(self) -> None:
17
+ raise_on_not_writable_file(self.lock_file)
18
+ ensure_directory_exists(self.lock_file)
19
+ # first check for exists and read-only mode as the open will mask this case as EEXIST
20
+ flags = (
21
+ os.O_WRONLY # open for writing only
22
+ | os.O_CREAT
23
+ | os.O_EXCL # together with above raise EEXIST if the file specified by filename exists
24
+ | os.O_TRUNC # truncate the file to zero byte
25
+ )
26
+ try:
27
+ file_handler = os.open(self.lock_file, flags, self._context.mode)
28
+ except OSError as exception: # re-raise unless expected exception
29
+ if not (
30
+ exception.errno == EEXIST # lock already exist
31
+ or (exception.errno == EACCES and sys.platform == "win32") # has no access to this lock
32
+ ): # pragma: win32 no cover
33
+ raise
34
+ else:
35
+ self._context.lock_file_fd = file_handler
36
+
37
+ def _release(self) -> None:
38
+ assert self._context.lock_file_fd is not None # noqa: S101
39
+ os.close(self._context.lock_file_fd) # the lock file is definitely not None
40
+ self._context.lock_file_fd = None
41
+ with suppress(OSError): # the file is already deleted and that's what we want
42
+ Path(self.lock_file).unlink()
43
+
44
+
45
+ __all__ = [
46
+ "SoftFileLock",
47
+ ]
lib/python3.11/site-packages/filelock/_unix.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from contextlib import suppress
6
+ from errno import ENOSYS
7
+ from typing import cast
8
+
9
+ from ._api import BaseFileLock
10
+ from ._util import ensure_directory_exists
11
+
12
+ #: a flag to indicate if the fcntl API is available
13
+ has_fcntl = False
14
+ if sys.platform == "win32": # pragma: win32 cover
15
+
16
+ class UnixFileLock(BaseFileLock):
17
+ """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
18
+
19
+ def _acquire(self) -> None:
20
+ raise NotImplementedError
21
+
22
+ def _release(self) -> None:
23
+ raise NotImplementedError
24
+
25
+ else: # pragma: win32 no cover
26
+ try:
27
+ import fcntl
28
+ except ImportError:
29
+ pass
30
+ else:
31
+ has_fcntl = True
32
+
33
+ class UnixFileLock(BaseFileLock):
34
+ """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
35
+
36
+ def _acquire(self) -> None:
37
+ ensure_directory_exists(self.lock_file)
38
+ open_flags = os.O_RDWR | os.O_CREAT | os.O_TRUNC
39
+ fd = os.open(self.lock_file, open_flags, self._context.mode)
40
+ with suppress(PermissionError): # This locked is not owned by this UID
41
+ os.fchmod(fd, self._context.mode)
42
+ try:
43
+ fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
44
+ except OSError as exception:
45
+ os.close(fd)
46
+ if exception.errno == ENOSYS: # NotImplemented error
47
+ msg = "FileSystem does not appear to support flock; user SoftFileLock instead"
48
+ raise NotImplementedError(msg) from exception
49
+ else:
50
+ self._context.lock_file_fd = fd
51
+
52
+ def _release(self) -> None:
53
+ # Do not remove the lockfile:
54
+ # https://github.com/tox-dev/py-filelock/issues/31
55
+ # https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
56
+ fd = cast(int, self._context.lock_file_fd)
57
+ self._context.lock_file_fd = None
58
+ fcntl.flock(fd, fcntl.LOCK_UN)
59
+ os.close(fd)
60
+
61
+
62
+ __all__ = [
63
+ "has_fcntl",
64
+ "UnixFileLock",
65
+ ]
lib/python3.11/site-packages/filelock/_util.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import stat
5
+ import sys
6
+ from errno import EACCES, EISDIR
7
+ from pathlib import Path
8
+
9
+
10
+ def raise_on_not_writable_file(filename: str) -> None:
11
+ """
12
+ Raise an exception if attempting to open the file for writing would fail.
13
+ This is done so files that will never be writable can be separated from
14
+ files that are writable but currently locked
15
+ :param filename: file to check
16
+ :raises OSError: as if the file was opened for writing.
17
+ """
18
+ try: # use stat to do exists + can write to check without race condition
19
+ file_stat = os.stat(filename) # noqa: PTH116
20
+ except OSError:
21
+ return # swallow does not exist or other errors
22
+
23
+ if file_stat.st_mtime != 0: # if os.stat returns but modification is zero that's an invalid os.stat - ignore it
24
+ if not (file_stat.st_mode & stat.S_IWUSR):
25
+ raise PermissionError(EACCES, "Permission denied", filename)
26
+
27
+ if stat.S_ISDIR(file_stat.st_mode):
28
+ if sys.platform == "win32": # pragma: win32 cover
29
+ # On Windows, this is PermissionError
30
+ raise PermissionError(EACCES, "Permission denied", filename)
31
+ else: # pragma: win32 no cover # noqa: RET506
32
+ # On linux / macOS, this is IsADirectoryError
33
+ raise IsADirectoryError(EISDIR, "Is a directory", filename)
34
+
35
+
36
+ def ensure_directory_exists(filename: Path | str) -> None:
37
+ """
38
+ Ensure the directory containing the file exists (create it if necessary)
39
+ :param filename: file.
40
+ """
41
+ Path(filename).parent.mkdir(parents=True, exist_ok=True)
42
+
43
+
44
+ __all__ = [
45
+ "raise_on_not_writable_file",
46
+ "ensure_directory_exists",
47
+ ]
lib/python3.11/site-packages/filelock/_windows.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import sys
5
+ from contextlib import suppress
6
+ from errno import EACCES
7
+ from pathlib import Path
8
+ from typing import cast
9
+
10
+ from ._api import BaseFileLock
11
+ from ._util import ensure_directory_exists, raise_on_not_writable_file
12
+
13
+ if sys.platform == "win32": # pragma: win32 cover
14
+ import msvcrt
15
+
16
+ class WindowsFileLock(BaseFileLock):
17
+ """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
18
+
19
+ def _acquire(self) -> None:
20
+ raise_on_not_writable_file(self.lock_file)
21
+ ensure_directory_exists(self.lock_file)
22
+ flags = (
23
+ os.O_RDWR # open for read and write
24
+ | os.O_CREAT # create file if not exists
25
+ | os.O_TRUNC # truncate file if not empty
26
+ )
27
+ try:
28
+ fd = os.open(self.lock_file, flags, self._context.mode)
29
+ except OSError as exception:
30
+ if exception.errno != EACCES: # has no access to this lock
31
+ raise
32
+ else:
33
+ try:
34
+ msvcrt.locking(fd, msvcrt.LK_NBLCK, 1)
35
+ except OSError as exception:
36
+ os.close(fd) # close file first
37
+ if exception.errno != EACCES: # file is already locked
38
+ raise
39
+ else:
40
+ self._context.lock_file_fd = fd
41
+
42
+ def _release(self) -> None:
43
+ fd = cast(int, self._context.lock_file_fd)
44
+ self._context.lock_file_fd = None
45
+ msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
46
+ os.close(fd)
47
+
48
+ with suppress(OSError): # Probably another instance of the application hat acquired the file lock.
49
+ Path(self.lock_file).unlink()
50
+
51
+ else: # pragma: win32 no cover
52
+
53
+ class WindowsFileLock(BaseFileLock):
54
+ """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
55
+
56
+ def _acquire(self) -> None:
57
+ raise NotImplementedError
58
+
59
+ def _release(self) -> None:
60
+ raise NotImplementedError
61
+
62
+
63
+ __all__ = [
64
+ "WindowsFileLock",
65
+ ]
lib/python3.11/site-packages/filelock/py.typed ADDED
File without changes
lib/python3.11/site-packages/filelock/version.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # file generated by setuptools_scm
2
+ # don't change, don't track in version control
3
+ TYPE_CHECKING = False
4
+ if TYPE_CHECKING:
5
+ from typing import Tuple, Union
6
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
7
+ else:
8
+ VERSION_TUPLE = object
9
+
10
+ version: str
11
+ __version__: str
12
+ __version_tuple__: VERSION_TUPLE
13
+ version_tuple: VERSION_TUPLE
14
+
15
+ __version__ = version = '3.13.1'
16
+ __version_tuple__ = version_tuple = (3, 13, 1)
lib/python3.11/site-packages/fsspec/__init__.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from importlib.metadata import entry_points
2
+
3
+ from . import _version, caching
4
+ from .callbacks import Callback
5
+ from .compression import available_compressions
6
+ from .core import get_fs_token_paths, open, open_files, open_local
7
+ from .exceptions import FSTimeoutError
8
+ from .mapping import FSMap, get_mapper
9
+ from .registry import (
10
+ available_protocols,
11
+ filesystem,
12
+ get_filesystem_class,
13
+ register_implementation,
14
+ registry,
15
+ )
16
+ from .spec import AbstractFileSystem
17
+
18
+ __version__ = _version.get_versions()["version"]
19
+
20
+ __all__ = [
21
+ "AbstractFileSystem",
22
+ "FSTimeoutError",
23
+ "FSMap",
24
+ "filesystem",
25
+ "register_implementation",
26
+ "get_filesystem_class",
27
+ "get_fs_token_paths",
28
+ "get_mapper",
29
+ "open",
30
+ "open_files",
31
+ "open_local",
32
+ "registry",
33
+ "caching",
34
+ "Callback",
35
+ "available_protocols",
36
+ "available_compressions",
37
+ ]
38
+
39
+
40
+ def process_entries():
41
+ if entry_points is not None:
42
+ try:
43
+ eps = entry_points()
44
+ except TypeError:
45
+ pass # importlib-metadata < 0.8
46
+ else:
47
+ if hasattr(eps, "select"): # Python 3.10+ / importlib_metadata >= 3.9.0
48
+ specs = eps.select(group="fsspec.specs")
49
+ else:
50
+ specs = eps.get("fsspec.specs", [])
51
+ registered_names = {}
52
+ for spec in specs:
53
+ err_msg = f"Unable to load filesystem from {spec}"
54
+ name = spec.name
55
+ if name in registered_names:
56
+ continue
57
+ registered_names[name] = True
58
+ register_implementation(
59
+ name,
60
+ spec.value.replace(":", "."),
61
+ errtxt=err_msg,
62
+ # We take our implementations as the ones to overload with if
63
+ # for some reason we encounter some, may be the same, already
64
+ # registered
65
+ clobber=True,
66
+ )
67
+
68
+
69
+ process_entries()
lib/python3.11/site-packages/fsspec/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (2.26 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc ADDED
Binary file (630 Bytes). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc ADDED
Binary file (4.81 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc ADDED
Binary file (51.9 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/caching.cpython-311.pyc ADDED
Binary file (37.6 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc ADDED
Binary file (10 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc ADDED
Binary file (8.07 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc ADDED
Binary file (6.71 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc ADDED
Binary file (3.44 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc ADDED
Binary file (30.7 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc ADDED
Binary file (4.77 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc ADDED
Binary file (991 Bytes). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/fuse.cpython-311.pyc ADDED
Binary file (17.1 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc ADDED
Binary file (21.7 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc ADDED
Binary file (23.3 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/mapping.cpython-311.pyc ADDED
Binary file (13.6 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc ADDED
Binary file (17.8 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/registry.cpython-311.pyc ADDED
Binary file (11.3 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/spec.cpython-311.pyc ADDED
Binary file (88.5 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc ADDED
Binary file (5.01 kB). View file
 
lib/python3.11/site-packages/fsspec/__pycache__/utils.cpython-311.pyc ADDED
Binary file (32.1 kB). View file
 
lib/python3.11/site-packages/fsspec/_version.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # This file was generated by 'versioneer.py' (0.29) from
3
+ # revision-control system data, or from the parent directory name of an
4
+ # unpacked source archive. Distribution tarballs contain a pre-generated copy
5
+ # of this file.
6
+
7
+ import json
8
+
9
+ version_json = '''
10
+ {
11
+ "date": "2023-12-11T16:18:48-0500",
12
+ "dirty": false,
13
+ "error": null,
14
+ "full-revisionid": "dd8cb9bf620be4d9153e854dd1431c23a2be6db0",
15
+ "version": "2023.12.2"
16
+ }
17
+ ''' # END VERSION_JSON
18
+
19
+
20
+ def get_versions():
21
+ return json.loads(version_json)
lib/python3.11/site-packages/fsspec/archive.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fsspec import AbstractFileSystem
2
+ from fsspec.utils import tokenize
3
+
4
+
5
+ class AbstractArchiveFileSystem(AbstractFileSystem):
6
+ """
7
+ A generic superclass for implementing Archive-based filesystems.
8
+
9
+ Currently, it is shared amongst
10
+ :class:`~fsspec.implementations.zip.ZipFileSystem`,
11
+ :class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and
12
+ :class:`~fsspec.implementations.tar.TarFileSystem`.
13
+ """
14
+
15
+ def __str__(self):
16
+ return f"<Archive-like object {type(self).__name__} at {id(self)}>"
17
+
18
+ __repr__ = __str__
19
+
20
+ def ukey(self, path):
21
+ return tokenize(path, self.fo, self.protocol)
22
+
23
+ def _all_dirnames(self, paths):
24
+ """Returns *all* directory names for each path in paths, including intermediate
25
+ ones.
26
+
27
+ Parameters
28
+ ----------
29
+ paths: Iterable of path strings
30
+ """
31
+ if len(paths) == 0:
32
+ return set()
33
+
34
+ dirnames = {self._parent(path) for path in paths} - {self.root_marker}
35
+ return dirnames | self._all_dirnames(dirnames)
36
+
37
+ def info(self, path, **kwargs):
38
+ self._get_dirs()
39
+ path = self._strip_protocol(path)
40
+ if path in {"", "/"} and self.dir_cache:
41
+ return {"name": "", "type": "directory", "size": 0}
42
+ if path in self.dir_cache:
43
+ return self.dir_cache[path]
44
+ elif path + "/" in self.dir_cache:
45
+ return self.dir_cache[path + "/"]
46
+ else:
47
+ raise FileNotFoundError(path)
48
+
49
+ def ls(self, path, detail=True, **kwargs):
50
+ self._get_dirs()
51
+ paths = {}
52
+ for p, f in self.dir_cache.items():
53
+ p = p.rstrip("/")
54
+ if "/" in p:
55
+ root = p.rsplit("/", 1)[0]
56
+ else:
57
+ root = ""
58
+ if root == path.rstrip("/"):
59
+ paths[p] = f
60
+ elif all(
61
+ (a == b)
62
+ for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
63
+ ):
64
+ # root directory entry
65
+ ppath = p.rstrip("/").split("/", 1)[0]
66
+ if ppath not in paths:
67
+ out = {"name": ppath, "size": 0, "type": "directory"}
68
+ paths[ppath] = out
69
+ if detail:
70
+ out = sorted(paths.values(), key=lambda _: _["name"])
71
+ return out
72
+ else:
73
+ return sorted(paths)
lib/python3.11/site-packages/fsspec/asyn.py ADDED
@@ -0,0 +1,1081 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import asyncio.events
3
+ import functools
4
+ import inspect
5
+ import io
6
+ import numbers
7
+ import os
8
+ import re
9
+ import threading
10
+ from contextlib import contextmanager
11
+ from glob import has_magic
12
+ from typing import TYPE_CHECKING, Iterable
13
+
14
+ from .callbacks import _DEFAULT_CALLBACK
15
+ from .exceptions import FSTimeoutError
16
+ from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
17
+ from .spec import AbstractBufferedFile, AbstractFileSystem
18
+ from .utils import glob_translate, is_exception, other_paths
19
+
20
+ private = re.compile("_[^_]")
21
+ iothread = [None] # dedicated fsspec IO thread
22
+ loop = [None] # global event loop for any non-async instance
23
+ _lock = None # global lock placeholder
24
+ get_running_loop = asyncio.get_running_loop
25
+
26
+
27
+ def get_lock():
28
+ """Allocate or return a threading lock.
29
+
30
+ The lock is allocated on first use to allow setting one lock per forked process.
31
+ """
32
+ global _lock
33
+ if not _lock:
34
+ _lock = threading.Lock()
35
+ return _lock
36
+
37
+
38
+ def reset_lock():
39
+ """Reset the global lock.
40
+
41
+ This should be called only on the init of a forked process to reset the lock to
42
+ None, enabling the new forked process to get a new lock.
43
+ """
44
+ global _lock
45
+
46
+ iothread[0] = None
47
+ loop[0] = None
48
+ _lock = None
49
+
50
+
51
+ async def _runner(event, coro, result, timeout=None):
52
+ timeout = timeout if timeout else None # convert 0 or 0.0 to None
53
+ if timeout is not None:
54
+ coro = asyncio.wait_for(coro, timeout=timeout)
55
+ try:
56
+ result[0] = await coro
57
+ except Exception as ex:
58
+ result[0] = ex
59
+ finally:
60
+ event.set()
61
+
62
+
63
+ def sync(loop, func, *args, timeout=None, **kwargs):
64
+ """
65
+ Make loop run coroutine until it returns. Runs in other thread
66
+
67
+ Examples
68
+ --------
69
+ >>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args,
70
+ timeout=timeout, **kwargs)
71
+ """
72
+ timeout = timeout if timeout else None # convert 0 or 0.0 to None
73
+ # NB: if the loop is not running *yet*, it is OK to submit work
74
+ # and we will wait for it
75
+ if loop is None or loop.is_closed():
76
+ raise RuntimeError("Loop is not running")
77
+ try:
78
+ loop0 = asyncio.events.get_running_loop()
79
+ if loop0 is loop:
80
+ raise NotImplementedError("Calling sync() from within a running loop")
81
+ except NotImplementedError:
82
+ raise
83
+ except RuntimeError:
84
+ pass
85
+ coro = func(*args, **kwargs)
86
+ result = [None]
87
+ event = threading.Event()
88
+ asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
89
+ while True:
90
+ # this loops allows thread to get interrupted
91
+ if event.wait(1):
92
+ break
93
+ if timeout is not None:
94
+ timeout -= 1
95
+ if timeout < 0:
96
+ raise FSTimeoutError
97
+
98
+ return_result = result[0]
99
+ if isinstance(return_result, asyncio.TimeoutError):
100
+ # suppress asyncio.TimeoutError, raise FSTimeoutError
101
+ raise FSTimeoutError from return_result
102
+ elif isinstance(return_result, BaseException):
103
+ raise return_result
104
+ else:
105
+ return return_result
106
+
107
+
108
+ def sync_wrapper(func, obj=None):
109
+ """Given a function, make so can be called in blocking contexts
110
+
111
+ Leave obj=None if defining within a class. Pass the instance if attaching
112
+ as an attribute of the instance.
113
+ """
114
+
115
+ @functools.wraps(func)
116
+ def wrapper(*args, **kwargs):
117
+ self = obj or args[0]
118
+ return sync(self.loop, func, *args, **kwargs)
119
+
120
+ return wrapper
121
+
122
+
123
+ @contextmanager
124
+ def _selector_policy():
125
+ original_policy = asyncio.get_event_loop_policy()
126
+ try:
127
+ if os.name == "nt" and hasattr(asyncio, "WindowsSelectorEventLoopPolicy"):
128
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
129
+
130
+ yield
131
+ finally:
132
+ asyncio.set_event_loop_policy(original_policy)
133
+
134
+
135
+ def get_loop():
136
+ """Create or return the default fsspec IO loop
137
+
138
+ The loop will be running on a separate thread.
139
+ """
140
+ if loop[0] is None:
141
+ with get_lock():
142
+ # repeat the check just in case the loop got filled between the
143
+ # previous two calls from another thread
144
+ if loop[0] is None:
145
+ with _selector_policy():
146
+ loop[0] = asyncio.new_event_loop()
147
+ th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
148
+ th.daemon = True
149
+ th.start()
150
+ iothread[0] = th
151
+ return loop[0]
152
+
153
+
154
+ if TYPE_CHECKING:
155
+ import resource
156
+
157
+ ResourceError = resource.error
158
+ else:
159
+ try:
160
+ import resource
161
+ except ImportError:
162
+ resource = None
163
+ ResourceError = OSError
164
+ else:
165
+ ResourceError = getattr(resource, "error", OSError)
166
+
167
+ _DEFAULT_BATCH_SIZE = 128
168
+ _NOFILES_DEFAULT_BATCH_SIZE = 1280
169
+
170
+
171
+ def _get_batch_size(nofiles=False):
172
+ from fsspec.config import conf
173
+
174
+ if nofiles:
175
+ if "nofiles_gather_batch_size" in conf:
176
+ return conf["nofiles_gather_batch_size"]
177
+ else:
178
+ if "gather_batch_size" in conf:
179
+ return conf["gather_batch_size"]
180
+ if nofiles:
181
+ return _NOFILES_DEFAULT_BATCH_SIZE
182
+ if resource is None:
183
+ return _DEFAULT_BATCH_SIZE
184
+
185
+ try:
186
+ soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
187
+ except (ImportError, ValueError, ResourceError):
188
+ return _DEFAULT_BATCH_SIZE
189
+
190
+ if soft_limit == resource.RLIM_INFINITY:
191
+ return -1
192
+ else:
193
+ return soft_limit // 8
194
+
195
+
196
+ def running_async() -> bool:
197
+ """Being executed by an event loop?"""
198
+ try:
199
+ asyncio.get_running_loop()
200
+ return True
201
+ except RuntimeError:
202
+ return False
203
+
204
+
205
+ async def _run_coros_in_chunks(
206
+ coros,
207
+ batch_size=None,
208
+ callback=_DEFAULT_CALLBACK,
209
+ timeout=None,
210
+ return_exceptions=False,
211
+ nofiles=False,
212
+ ):
213
+ """Run the given coroutines in chunks.
214
+
215
+ Parameters
216
+ ----------
217
+ coros: list of coroutines to run
218
+ batch_size: int or None
219
+ Number of coroutines to submit/wait on simultaneously.
220
+ If -1, then it will not be any throttling. If
221
+ None, it will be inferred from _get_batch_size()
222
+ callback: fsspec.callbacks.Callback instance
223
+ Gets a relative_update when each coroutine completes
224
+ timeout: number or None
225
+ If given, each coroutine times out after this time. Note that, since
226
+ there are multiple batches, the total run time of this function will in
227
+ general be longer
228
+ return_exceptions: bool
229
+ Same meaning as in asyncio.gather
230
+ nofiles: bool
231
+ If inferring the batch_size, does this operation involve local files?
232
+ If yes, you normally expect smaller batches.
233
+ """
234
+
235
+ if batch_size is None:
236
+ batch_size = _get_batch_size(nofiles=nofiles)
237
+
238
+ if batch_size == -1:
239
+ batch_size = len(coros)
240
+
241
+ assert batch_size > 0
242
+ results = []
243
+ for start in range(0, len(coros), batch_size):
244
+ chunk = [
245
+ asyncio.Task(asyncio.wait_for(c, timeout=timeout))
246
+ for c in coros[start : start + batch_size]
247
+ ]
248
+ if callback is not _DEFAULT_CALLBACK:
249
+ [
250
+ t.add_done_callback(lambda *_, **__: callback.relative_update(1))
251
+ for t in chunk
252
+ ]
253
+ results.extend(
254
+ await asyncio.gather(*chunk, return_exceptions=return_exceptions),
255
+ )
256
+ return results
257
+
258
+
259
+ # these methods should be implemented as async by any async-able backend
260
+ async_methods = [
261
+ "_ls",
262
+ "_cat_file",
263
+ "_get_file",
264
+ "_put_file",
265
+ "_rm_file",
266
+ "_cp_file",
267
+ "_pipe_file",
268
+ "_expand_path",
269
+ "_info",
270
+ "_isfile",
271
+ "_isdir",
272
+ "_exists",
273
+ "_walk",
274
+ "_glob",
275
+ "_find",
276
+ "_du",
277
+ "_size",
278
+ "_mkdir",
279
+ "_makedirs",
280
+ ]
281
+
282
+
283
+ class AsyncFileSystem(AbstractFileSystem):
284
+ """Async file operations, default implementations
285
+
286
+ Passes bulk operations to asyncio.gather for concurrent operation.
287
+
288
+ Implementations that have concurrent batch operations and/or async methods
289
+ should inherit from this class instead of AbstractFileSystem. Docstrings are
290
+ copied from the un-underscored method in AbstractFileSystem, if not given.
291
+ """
292
+
293
+ # note that methods do not have docstring here; they will be copied
294
+ # for _* methods and inferred for overridden methods.
295
+
296
+ async_impl = True
297
+ mirror_sync_methods = True
298
+ disable_throttling = False
299
+
300
+ def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs):
301
+ self.asynchronous = asynchronous
302
+ self._pid = os.getpid()
303
+ if not asynchronous:
304
+ self._loop = loop or get_loop()
305
+ else:
306
+ self._loop = None
307
+ self.batch_size = batch_size
308
+ super().__init__(*args, **kwargs)
309
+
310
+ @property
311
+ def loop(self):
312
+ if self._pid != os.getpid():
313
+ raise RuntimeError("This class is not fork-safe")
314
+ return self._loop
315
+
316
+ async def _rm_file(self, path, **kwargs):
317
+ raise NotImplementedError
318
+
319
+ async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
320
+ # TODO: implement on_error
321
+ batch_size = batch_size or self.batch_size
322
+ path = await self._expand_path(path, recursive=recursive)
323
+ return await _run_coros_in_chunks(
324
+ [self._rm_file(p, **kwargs) for p in reversed(path)],
325
+ batch_size=batch_size,
326
+ nofiles=True,
327
+ )
328
+
329
+ async def _cp_file(self, path1, path2, **kwargs):
330
+ raise NotImplementedError
331
+
332
+ async def _copy(
333
+ self,
334
+ path1,
335
+ path2,
336
+ recursive=False,
337
+ on_error=None,
338
+ maxdepth=None,
339
+ batch_size=None,
340
+ **kwargs,
341
+ ):
342
+ if on_error is None and recursive:
343
+ on_error = "ignore"
344
+ elif on_error is None:
345
+ on_error = "raise"
346
+
347
+ if isinstance(path1, list) and isinstance(path2, list):
348
+ # No need to expand paths when both source and destination
349
+ # are provided as lists
350
+ paths1 = path1
351
+ paths2 = path2
352
+ else:
353
+ source_is_str = isinstance(path1, str)
354
+ paths1 = await self._expand_path(
355
+ path1, maxdepth=maxdepth, recursive=recursive
356
+ )
357
+ if source_is_str and (not recursive or maxdepth is not None):
358
+ # Non-recursive glob does not copy directories
359
+ paths1 = [
360
+ p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
361
+ ]
362
+ if not paths1:
363
+ return
364
+
365
+ source_is_file = len(paths1) == 1
366
+ dest_is_dir = isinstance(path2, str) and (
367
+ trailing_sep(path2) or await self._isdir(path2)
368
+ )
369
+
370
+ exists = source_is_str and (
371
+ (has_magic(path1) and source_is_file)
372
+ or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
373
+ )
374
+ paths2 = other_paths(
375
+ paths1,
376
+ path2,
377
+ exists=exists,
378
+ flatten=not source_is_str,
379
+ )
380
+
381
+ batch_size = batch_size or self.batch_size
382
+ coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
383
+ result = await _run_coros_in_chunks(
384
+ coros, batch_size=batch_size, return_exceptions=True, nofiles=True
385
+ )
386
+
387
+ for ex in filter(is_exception, result):
388
+ if on_error == "ignore" and isinstance(ex, FileNotFoundError):
389
+ continue
390
+ raise ex
391
+
392
+ async def _pipe_file(self, path, value, **kwargs):
393
+ raise NotImplementedError
394
+
395
+ async def _pipe(self, path, value=None, batch_size=None, **kwargs):
396
+ if isinstance(path, str):
397
+ path = {path: value}
398
+ batch_size = batch_size or self.batch_size
399
+ return await _run_coros_in_chunks(
400
+ [self._pipe_file(k, v, **kwargs) for k, v in path.items()],
401
+ batch_size=batch_size,
402
+ nofiles=True,
403
+ )
404
+
405
+ async def _process_limits(self, url, start, end):
406
+ """Helper for "Range"-based _cat_file"""
407
+ size = None
408
+ suff = False
409
+ if start is not None and start < 0:
410
+ # if start is negative and end None, end is the "suffix length"
411
+ if end is None:
412
+ end = -start
413
+ start = ""
414
+ suff = True
415
+ else:
416
+ size = size or (await self._info(url))["size"]
417
+ start = size + start
418
+ elif start is None:
419
+ start = 0
420
+ if not suff:
421
+ if end is not None and end < 0:
422
+ if start is not None:
423
+ size = size or (await self._info(url))["size"]
424
+ end = size + end
425
+ elif end is None:
426
+ end = ""
427
+ if isinstance(end, numbers.Integral):
428
+ end -= 1 # bytes range is inclusive
429
+ return f"bytes={start}-{end}"
430
+
431
+ async def _cat_file(self, path, start=None, end=None, **kwargs):
432
+ raise NotImplementedError
433
+
434
+ async def _cat(
435
+ self, path, recursive=False, on_error="raise", batch_size=None, **kwargs
436
+ ):
437
+ paths = await self._expand_path(path, recursive=recursive)
438
+ coros = [self._cat_file(path, **kwargs) for path in paths]
439
+ batch_size = batch_size or self.batch_size
440
+ out = await _run_coros_in_chunks(
441
+ coros, batch_size=batch_size, nofiles=True, return_exceptions=True
442
+ )
443
+ if on_error == "raise":
444
+ ex = next(filter(is_exception, out), False)
445
+ if ex:
446
+ raise ex
447
+ if (
448
+ len(paths) > 1
449
+ or isinstance(path, list)
450
+ or paths[0] != self._strip_protocol(path)
451
+ ):
452
+ return {
453
+ k: v
454
+ for k, v in zip(paths, out)
455
+ if on_error != "omit" or not is_exception(v)
456
+ }
457
+ else:
458
+ return out[0]
459
+
460
+ async def _cat_ranges(
461
+ self,
462
+ paths,
463
+ starts,
464
+ ends,
465
+ max_gap=None,
466
+ batch_size=None,
467
+ on_error="return",
468
+ **kwargs,
469
+ ):
470
+ """Get the contents of byte ranges from one or more files
471
+
472
+ Parameters
473
+ ----------
474
+ paths: list
475
+ A list of of filepaths on this filesystems
476
+ starts, ends: int or list
477
+ Bytes limits of the read. If using a single int, the same value will be
478
+ used to read all the specified files.
479
+ """
480
+ # TODO: on_error
481
+ if max_gap is not None:
482
+ # use utils.merge_offset_ranges
483
+ raise NotImplementedError
484
+ if not isinstance(paths, list):
485
+ raise TypeError
486
+ if not isinstance(starts, Iterable):
487
+ starts = [starts] * len(paths)
488
+ if not isinstance(ends, Iterable):
489
+ ends = [ends] * len(paths)
490
+ if len(starts) != len(paths) or len(ends) != len(paths):
491
+ raise ValueError
492
+ coros = [
493
+ self._cat_file(p, start=s, end=e, **kwargs)
494
+ for p, s, e in zip(paths, starts, ends)
495
+ ]
496
+ batch_size = batch_size or self.batch_size
497
+ return await _run_coros_in_chunks(
498
+ coros, batch_size=batch_size, nofiles=True, return_exceptions=True
499
+ )
500
+
501
+ async def _put_file(self, lpath, rpath, **kwargs):
502
+ raise NotImplementedError
503
+
504
+ async def _put(
505
+ self,
506
+ lpath,
507
+ rpath,
508
+ recursive=False,
509
+ callback=_DEFAULT_CALLBACK,
510
+ batch_size=None,
511
+ maxdepth=None,
512
+ **kwargs,
513
+ ):
514
+ """Copy file(s) from local.
515
+
516
+ Copies a specific file or tree of files (if recursive=True). If rpath
517
+ ends with a "/", it will be assumed to be a directory, and target files
518
+ will go within.
519
+
520
+ The put_file method will be called concurrently on a batch of files. The
521
+ batch_size option can configure the amount of futures that can be executed
522
+ at the same time. If it is -1, then all the files will be uploaded concurrently.
523
+ The default can be set for this instance by passing "batch_size" in the
524
+ constructor, or for all instances by setting the "gather_batch_size" key
525
+ in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
526
+ """
527
+ if isinstance(lpath, list) and isinstance(rpath, list):
528
+ # No need to expand paths when both source and destination
529
+ # are provided as lists
530
+ rpaths = rpath
531
+ lpaths = lpath
532
+ else:
533
+ source_is_str = isinstance(lpath, str)
534
+ if source_is_str:
535
+ lpath = make_path_posix(lpath)
536
+ fs = LocalFileSystem()
537
+ lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
538
+ if source_is_str and (not recursive or maxdepth is not None):
539
+ # Non-recursive glob does not copy directories
540
+ lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
541
+ if not lpaths:
542
+ return
543
+
544
+ source_is_file = len(lpaths) == 1
545
+ dest_is_dir = isinstance(rpath, str) and (
546
+ trailing_sep(rpath) or await self._isdir(rpath)
547
+ )
548
+
549
+ rpath = self._strip_protocol(rpath)
550
+ exists = source_is_str and (
551
+ (has_magic(lpath) and source_is_file)
552
+ or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
553
+ )
554
+ rpaths = other_paths(
555
+ lpaths,
556
+ rpath,
557
+ exists=exists,
558
+ flatten=not source_is_str,
559
+ )
560
+
561
+ is_dir = {l: os.path.isdir(l) for l in lpaths}
562
+ rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
563
+ file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]]
564
+
565
+ await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs])
566
+ batch_size = batch_size or self.batch_size
567
+
568
+ coros = []
569
+ callback.set_size(len(file_pairs))
570
+ for lfile, rfile in file_pairs:
571
+ callback.branch(lfile, rfile, kwargs)
572
+ coros.append(self._put_file(lfile, rfile, **kwargs))
573
+
574
+ return await _run_coros_in_chunks(
575
+ coros, batch_size=batch_size, callback=callback
576
+ )
577
+
578
+ async def _get_file(self, rpath, lpath, **kwargs):
579
+ raise NotImplementedError
580
+
581
+ async def _get(
582
+ self,
583
+ rpath,
584
+ lpath,
585
+ recursive=False,
586
+ callback=_DEFAULT_CALLBACK,
587
+ maxdepth=None,
588
+ **kwargs,
589
+ ):
590
+ """Copy file(s) to local.
591
+
592
+ Copies a specific file or tree of files (if recursive=True). If lpath
593
+ ends with a "/", it will be assumed to be a directory, and target files
594
+ will go within. Can submit a list of paths, which may be glob-patterns
595
+ and will be expanded.
596
+
597
+ The get_file method will be called concurrently on a batch of files. The
598
+ batch_size option can configure the amount of futures that can be executed
599
+ at the same time. If it is -1, then all the files will be uploaded concurrently.
600
+ The default can be set for this instance by passing "batch_size" in the
601
+ constructor, or for all instances by setting the "gather_batch_size" key
602
+ in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
603
+ """
604
+ if isinstance(lpath, list) and isinstance(rpath, list):
605
+ # No need to expand paths when both source and destination
606
+ # are provided as lists
607
+ rpaths = rpath
608
+ lpaths = lpath
609
+ else:
610
+ source_is_str = isinstance(rpath, str)
611
+ # First check for rpath trailing slash as _strip_protocol removes it.
612
+ source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
613
+ rpath = self._strip_protocol(rpath)
614
+ rpaths = await self._expand_path(
615
+ rpath, recursive=recursive, maxdepth=maxdepth
616
+ )
617
+ if source_is_str and (not recursive or maxdepth is not None):
618
+ # Non-recursive glob does not copy directories
619
+ rpaths = [
620
+ p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
621
+ ]
622
+ if not rpaths:
623
+ return
624
+
625
+ lpath = make_path_posix(lpath)
626
+ source_is_file = len(rpaths) == 1
627
+ dest_is_dir = isinstance(lpath, str) and (
628
+ trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
629
+ )
630
+
631
+ exists = source_is_str and (
632
+ (has_magic(rpath) and source_is_file)
633
+ or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
634
+ )
635
+ lpaths = other_paths(
636
+ rpaths,
637
+ lpath,
638
+ exists=exists,
639
+ flatten=not source_is_str,
640
+ )
641
+
642
+ [os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
643
+ batch_size = kwargs.pop("batch_size", self.batch_size)
644
+
645
+ coros = []
646
+ callback.set_size(len(lpaths))
647
+ for lpath, rpath in zip(lpaths, rpaths):
648
+ callback.branch(rpath, lpath, kwargs)
649
+ coros.append(self._get_file(rpath, lpath, **kwargs))
650
+ return await _run_coros_in_chunks(
651
+ coros, batch_size=batch_size, callback=callback
652
+ )
653
+
654
+ async def _isfile(self, path):
655
+ try:
656
+ return (await self._info(path))["type"] == "file"
657
+ except: # noqa: E722
658
+ return False
659
+
660
+ async def _isdir(self, path):
661
+ try:
662
+ return (await self._info(path))["type"] == "directory"
663
+ except OSError:
664
+ return False
665
+
666
+ async def _size(self, path):
667
+ return (await self._info(path)).get("size", None)
668
+
669
+ async def _sizes(self, paths, batch_size=None):
670
+ batch_size = batch_size or self.batch_size
671
+ return await _run_coros_in_chunks(
672
+ [self._size(p) for p in paths], batch_size=batch_size
673
+ )
674
+
675
+ async def _exists(self, path, **kwargs):
676
+ try:
677
+ await self._info(path, **kwargs)
678
+ return True
679
+ except FileNotFoundError:
680
+ return False
681
+
682
+ async def _info(self, path, **kwargs):
683
+ raise NotImplementedError
684
+
685
+ async def _ls(self, path, detail=True, **kwargs):
686
+ raise NotImplementedError
687
+
688
+ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
689
+ if maxdepth is not None and maxdepth < 1:
690
+ raise ValueError("maxdepth must be at least 1")
691
+
692
+ path = self._strip_protocol(path)
693
+ full_dirs = {}
694
+ dirs = {}
695
+ files = {}
696
+
697
+ detail = kwargs.pop("detail", False)
698
+ try:
699
+ listing = await self._ls(path, detail=True, **kwargs)
700
+ except (FileNotFoundError, OSError) as e:
701
+ if on_error == "raise":
702
+ raise
703
+ elif callable(on_error):
704
+ on_error(e)
705
+ if detail:
706
+ yield path, {}, {}
707
+ else:
708
+ yield path, [], []
709
+ return
710
+
711
+ for info in listing:
712
+ # each info name must be at least [path]/part , but here
713
+ # we check also for names like [path]/part/
714
+ pathname = info["name"].rstrip("/")
715
+ name = pathname.rsplit("/", 1)[-1]
716
+ if info["type"] == "directory" and pathname != path:
717
+ # do not include "self" path
718
+ full_dirs[name] = pathname
719
+ dirs[name] = info
720
+ elif pathname == path:
721
+ # file-like with same name as give path
722
+ files[""] = info
723
+ else:
724
+ files[name] = info
725
+
726
+ if detail:
727
+ yield path, dirs, files
728
+ else:
729
+ yield path, list(dirs), list(files)
730
+
731
+ if maxdepth is not None:
732
+ maxdepth -= 1
733
+ if maxdepth < 1:
734
+ return
735
+
736
+ for d in dirs:
737
+ async for _ in self._walk(
738
+ full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs
739
+ ):
740
+ yield _
741
+
742
+ async def _glob(self, path, maxdepth=None, **kwargs):
743
+ if maxdepth is not None and maxdepth < 1:
744
+ raise ValueError("maxdepth must be at least 1")
745
+
746
+ import re
747
+
748
+ seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
749
+ ends_with_sep = path.endswith(seps) # _strip_protocol strips trailing slash
750
+ path = self._strip_protocol(path)
751
+ append_slash_to_dirname = ends_with_sep or path.endswith(
752
+ tuple(sep + "**" for sep in seps)
753
+ )
754
+ idx_star = path.find("*") if path.find("*") >= 0 else len(path)
755
+ idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
756
+ idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
757
+
758
+ min_idx = min(idx_star, idx_qmark, idx_brace)
759
+
760
+ detail = kwargs.pop("detail", False)
761
+
762
+ if not has_magic(path):
763
+ if await self._exists(path, **kwargs):
764
+ if not detail:
765
+ return [path]
766
+ else:
767
+ return {path: await self._info(path, **kwargs)}
768
+ else:
769
+ if not detail:
770
+ return [] # glob of non-existent returns empty
771
+ else:
772
+ return {}
773
+ elif "/" in path[:min_idx]:
774
+ min_idx = path[:min_idx].rindex("/")
775
+ root = path[: min_idx + 1]
776
+ depth = path[min_idx + 1 :].count("/") + 1
777
+ else:
778
+ root = ""
779
+ depth = path[min_idx + 1 :].count("/") + 1
780
+
781
+ if "**" in path:
782
+ if maxdepth is not None:
783
+ idx_double_stars = path.find("**")
784
+ depth_double_stars = path[idx_double_stars:].count("/") + 1
785
+ depth = depth - depth_double_stars + maxdepth
786
+ else:
787
+ depth = None
788
+
789
+ allpaths = await self._find(
790
+ root, maxdepth=depth, withdirs=True, detail=True, **kwargs
791
+ )
792
+
793
+ pattern = glob_translate(path + ("/" if ends_with_sep else ""))
794
+ pattern = re.compile(pattern)
795
+
796
+ out = {
797
+ p: info
798
+ for p, info in sorted(allpaths.items())
799
+ if pattern.match(
800
+ (
801
+ p + "/"
802
+ if append_slash_to_dirname and info["type"] == "directory"
803
+ else p
804
+ )
805
+ )
806
+ }
807
+
808
+ if detail:
809
+ return out
810
+ else:
811
+ return list(out)
812
+
813
+ async def _du(self, path, total=True, maxdepth=None, **kwargs):
814
+ sizes = {}
815
+ # async for?
816
+ for f in await self._find(path, maxdepth=maxdepth, **kwargs):
817
+ info = await self._info(f)
818
+ sizes[info["name"]] = info["size"]
819
+ if total:
820
+ return sum(sizes.values())
821
+ else:
822
+ return sizes
823
+
824
+ async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
825
+ path = self._strip_protocol(path)
826
+ out = {}
827
+ detail = kwargs.pop("detail", False)
828
+
829
+ # Add the root directory if withdirs is requested
830
+ # This is needed for posix glob compliance
831
+ if withdirs and path != "" and await self._isdir(path):
832
+ out[path] = await self._info(path)
833
+
834
+ # async for?
835
+ async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
836
+ if withdirs:
837
+ files.update(dirs)
838
+ out.update({info["name"]: info for name, info in files.items()})
839
+ if not out and (await self._isfile(path)):
840
+ # walk works on directories, but find should also return [path]
841
+ # when path happens to be a file
842
+ out[path] = {}
843
+ names = sorted(out)
844
+ if not detail:
845
+ return names
846
+ else:
847
+ return {name: out[name] for name in names}
848
+
849
+ async def _expand_path(self, path, recursive=False, maxdepth=None):
850
+ if maxdepth is not None and maxdepth < 1:
851
+ raise ValueError("maxdepth must be at least 1")
852
+
853
+ if isinstance(path, str):
854
+ out = await self._expand_path([path], recursive, maxdepth)
855
+ else:
856
+ out = set()
857
+ path = [self._strip_protocol(p) for p in path]
858
+ for p in path: # can gather here
859
+ if has_magic(p):
860
+ bit = set(await self._glob(p, maxdepth=maxdepth))
861
+ out |= bit
862
+ if recursive:
863
+ # glob call above expanded one depth so if maxdepth is defined
864
+ # then decrement it in expand_path call below. If it is zero
865
+ # after decrementing then avoid expand_path call.
866
+ if maxdepth is not None and maxdepth <= 1:
867
+ continue
868
+ out |= set(
869
+ await self._expand_path(
870
+ list(bit),
871
+ recursive=recursive,
872
+ maxdepth=maxdepth - 1 if maxdepth is not None else None,
873
+ )
874
+ )
875
+ continue
876
+ elif recursive:
877
+ rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True))
878
+ out |= rec
879
+ if p not in out and (recursive is False or (await self._exists(p))):
880
+ # should only check once, for the root
881
+ out.add(p)
882
+ if not out:
883
+ raise FileNotFoundError(path)
884
+ return sorted(out)
885
+
886
+ async def _mkdir(self, path, create_parents=True, **kwargs):
887
+ pass # not necessary to implement, may not have directories
888
+
889
+ async def _makedirs(self, path, exist_ok=False):
890
+ pass # not necessary to implement, may not have directories
891
+
892
+ async def open_async(self, path, mode="rb", **kwargs):
893
+ if "b" not in mode or kwargs.get("compression"):
894
+ raise ValueError
895
+ raise NotImplementedError
896
+
897
+
898
+ def mirror_sync_methods(obj):
899
+ """Populate sync and async methods for obj
900
+
901
+ For each method will create a sync version if the name refers to an async method
902
+ (coroutine) and there is no override in the child class; will create an async
903
+ method for the corresponding sync method if there is no implementation.
904
+
905
+ Uses the methods specified in
906
+ - async_methods: the set that an implementation is expected to provide
907
+ - default_async_methods: that can be derived from their sync version in
908
+ AbstractFileSystem
909
+ - AsyncFileSystem: async-specific default coroutines
910
+ """
911
+ from fsspec import AbstractFileSystem
912
+
913
+ for method in async_methods + dir(AsyncFileSystem):
914
+ if not method.startswith("_"):
915
+ continue
916
+ smethod = method[1:]
917
+ if private.match(method):
918
+ isco = inspect.iscoroutinefunction(getattr(obj, method, None))
919
+ unsync = getattr(getattr(obj, smethod, False), "__func__", None)
920
+ is_default = unsync is getattr(AbstractFileSystem, smethod, "")
921
+ if isco and is_default:
922
+ mth = sync_wrapper(getattr(obj, method), obj=obj)
923
+ setattr(obj, smethod, mth)
924
+ if not mth.__doc__:
925
+ mth.__doc__ = getattr(
926
+ getattr(AbstractFileSystem, smethod, None), "__doc__", ""
927
+ )
928
+
929
+
930
+ class FSSpecCoroutineCancel(Exception):
931
+ pass
932
+
933
+
934
+ def _dump_running_tasks(
935
+ printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False
936
+ ):
937
+ import traceback
938
+
939
+ tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()]
940
+ if printout:
941
+ [task.print_stack() for task in tasks]
942
+ out = [
943
+ {
944
+ "locals": task._coro.cr_frame.f_locals,
945
+ "file": task._coro.cr_frame.f_code.co_filename,
946
+ "firstline": task._coro.cr_frame.f_code.co_firstlineno,
947
+ "linelo": task._coro.cr_frame.f_lineno,
948
+ "stack": traceback.format_stack(task._coro.cr_frame),
949
+ "task": task if with_task else None,
950
+ }
951
+ for task in tasks
952
+ ]
953
+ if cancel:
954
+ for t in tasks:
955
+ cbs = t._callbacks
956
+ t.cancel()
957
+ asyncio.futures.Future.set_exception(t, exc)
958
+ asyncio.futures.Future.cancel(t)
959
+ [cb[0](t) for cb in cbs] # cancels any dependent concurrent.futures
960
+ try:
961
+ t._coro.throw(exc) # exits coro, unless explicitly handled
962
+ except exc:
963
+ pass
964
+ return out
965
+
966
+
967
+ class AbstractAsyncStreamedFile(AbstractBufferedFile):
968
+ # no read buffering, and always auto-commit
969
+ # TODO: readahead might still be useful here, but needs async version
970
+
971
+ async def read(self, length=-1):
972
+ """
973
+ Return data from cache, or fetch pieces as necessary
974
+
975
+ Parameters
976
+ ----------
977
+ length: int (-1)
978
+ Number of bytes to read; if <0, all remaining bytes.
979
+ """
980
+ length = -1 if length is None else int(length)
981
+ if self.mode != "rb":
982
+ raise ValueError("File not in read mode")
983
+ if length < 0:
984
+ length = self.size - self.loc
985
+ if self.closed:
986
+ raise ValueError("I/O operation on closed file.")
987
+ if length == 0:
988
+ # don't even bother calling fetch
989
+ return b""
990
+ out = await self._fetch_range(self.loc, self.loc + length)
991
+ self.loc += len(out)
992
+ return out
993
+
994
+ async def write(self, data):
995
+ """
996
+ Write data to buffer.
997
+
998
+ Buffer only sent on flush() or if buffer is greater than
999
+ or equal to blocksize.
1000
+
1001
+ Parameters
1002
+ ----------
1003
+ data: bytes
1004
+ Set of bytes to be written.
1005
+ """
1006
+ if self.mode not in {"wb", "ab"}:
1007
+ raise ValueError("File not in write mode")
1008
+ if self.closed:
1009
+ raise ValueError("I/O operation on closed file.")
1010
+ if self.forced:
1011
+ raise ValueError("This file has been force-flushed, can only close")
1012
+ out = self.buffer.write(data)
1013
+ self.loc += out
1014
+ if self.buffer.tell() >= self.blocksize:
1015
+ await self.flush()
1016
+ return out
1017
+
1018
+ async def close(self):
1019
+ """Close file
1020
+
1021
+ Finalizes writes, discards cache
1022
+ """
1023
+ if getattr(self, "_unclosable", False):
1024
+ return
1025
+ if self.closed:
1026
+ return
1027
+ if self.mode == "rb":
1028
+ self.cache = None
1029
+ else:
1030
+ if not self.forced:
1031
+ await self.flush(force=True)
1032
+
1033
+ if self.fs is not None:
1034
+ self.fs.invalidate_cache(self.path)
1035
+ self.fs.invalidate_cache(self.fs._parent(self.path))
1036
+
1037
+ self.closed = True
1038
+
1039
+ async def flush(self, force=False):
1040
+ if self.closed:
1041
+ raise ValueError("Flush on closed file")
1042
+ if force and self.forced:
1043
+ raise ValueError("Force flush cannot be called more than once")
1044
+ if force:
1045
+ self.forced = True
1046
+
1047
+ if self.mode not in {"wb", "ab"}:
1048
+ # no-op to flush on read-mode
1049
+ return
1050
+
1051
+ if not force and self.buffer.tell() < self.blocksize:
1052
+ # Defer write on small block
1053
+ return
1054
+
1055
+ if self.offset is None:
1056
+ # Initialize a multipart upload
1057
+ self.offset = 0
1058
+ try:
1059
+ await self._initiate_upload()
1060
+ except: # noqa: E722
1061
+ self.closed = True
1062
+ raise
1063
+
1064
+ if await self._upload_chunk(final=force) is not False:
1065
+ self.offset += self.buffer.seek(0, 2)
1066
+ self.buffer = io.BytesIO()
1067
+
1068
+ async def __aenter__(self):
1069
+ return self
1070
+
1071
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1072
+ await self.close()
1073
+
1074
+ async def _fetch_range(self, start, end):
1075
+ raise NotImplementedError
1076
+
1077
+ async def _initiate_upload(self):
1078
+ pass
1079
+
1080
+ async def _upload_chunk(self, final=False):
1081
+ raise NotImplementedError
lib/python3.11/site-packages/fsspec/caching.py ADDED
@@ -0,0 +1,875 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import collections
4
+ import functools
5
+ import logging
6
+ import math
7
+ import os
8
+ import threading
9
+ import warnings
10
+ from concurrent.futures import Future, ThreadPoolExecutor
11
+ from typing import (
12
+ TYPE_CHECKING,
13
+ Any,
14
+ Callable,
15
+ ClassVar,
16
+ Generic,
17
+ NamedTuple,
18
+ OrderedDict,
19
+ TypeVar,
20
+ )
21
+
22
+ if TYPE_CHECKING:
23
+ import mmap
24
+
25
+ from typing_extensions import ParamSpec
26
+
27
+ P = ParamSpec("P")
28
+ else:
29
+ P = TypeVar("P")
30
+
31
+ T = TypeVar("T")
32
+
33
+
34
+ logger = logging.getLogger("fsspec")
35
+
36
+ Fetcher = Callable[[int, int], bytes] # Maps (start, end) to bytes
37
+
38
+
39
+ class BaseCache:
40
+ """Pass-though cache: doesn't keep anything, calls every time
41
+
42
+ Acts as base class for other cachers
43
+
44
+ Parameters
45
+ ----------
46
+ blocksize: int
47
+ How far to read ahead in numbers of bytes
48
+ fetcher: func
49
+ Function of the form f(start, end) which gets bytes from remote as
50
+ specified
51
+ size: int
52
+ How big this file is
53
+ """
54
+
55
+ name: ClassVar[str] = "none"
56
+
57
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
58
+ self.blocksize = blocksize
59
+ self.fetcher = fetcher
60
+ self.size = size
61
+
62
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
63
+ if start is None:
64
+ start = 0
65
+ if stop is None:
66
+ stop = self.size
67
+ if start >= self.size or start >= stop:
68
+ return b""
69
+ return self.fetcher(start, stop)
70
+
71
+
72
+ class MMapCache(BaseCache):
73
+ """memory-mapped sparse file cache
74
+
75
+ Opens temporary file, which is filled blocks-wise when data is requested.
76
+ Ensure there is enough disc space in the temporary location.
77
+
78
+ This cache method might only work on posix
79
+ """
80
+
81
+ name = "mmap"
82
+
83
+ def __init__(
84
+ self,
85
+ blocksize: int,
86
+ fetcher: Fetcher,
87
+ size: int,
88
+ location: str | None = None,
89
+ blocks: set[int] | None = None,
90
+ ) -> None:
91
+ super().__init__(blocksize, fetcher, size)
92
+ self.blocks = set() if blocks is None else blocks
93
+ self.location = location
94
+ self.cache = self._makefile()
95
+
96
+ def _makefile(self) -> mmap.mmap | bytearray:
97
+ import mmap
98
+ import tempfile
99
+
100
+ if self.size == 0:
101
+ return bytearray()
102
+
103
+ # posix version
104
+ if self.location is None or not os.path.exists(self.location):
105
+ if self.location is None:
106
+ fd = tempfile.TemporaryFile()
107
+ self.blocks = set()
108
+ else:
109
+ fd = open(self.location, "wb+")
110
+ fd.seek(self.size - 1)
111
+ fd.write(b"1")
112
+ fd.flush()
113
+ else:
114
+ fd = open(self.location, "r+b")
115
+
116
+ return mmap.mmap(fd.fileno(), self.size)
117
+
118
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
119
+ logger.debug(f"MMap cache fetching {start}-{end}")
120
+ if start is None:
121
+ start = 0
122
+ if end is None:
123
+ end = self.size
124
+ if start >= self.size or start >= end:
125
+ return b""
126
+ start_block = start // self.blocksize
127
+ end_block = end // self.blocksize
128
+ need = [i for i in range(start_block, end_block + 1) if i not in self.blocks]
129
+ while need:
130
+ # TODO: not a for loop so we can consolidate blocks later to
131
+ # make fewer fetch calls; this could be parallel
132
+ i = need.pop(0)
133
+ sstart = i * self.blocksize
134
+ send = min(sstart + self.blocksize, self.size)
135
+ logger.debug(f"MMap get block #{i} ({sstart}-{send}")
136
+ self.cache[sstart:send] = self.fetcher(sstart, send)
137
+ self.blocks.add(i)
138
+
139
+ return self.cache[start:end]
140
+
141
+ def __getstate__(self) -> dict[str, Any]:
142
+ state = self.__dict__.copy()
143
+ # Remove the unpicklable entries.
144
+ del state["cache"]
145
+ return state
146
+
147
+ def __setstate__(self, state: dict[str, Any]) -> None:
148
+ # Restore instance attributes
149
+ self.__dict__.update(state)
150
+ self.cache = self._makefile()
151
+
152
+
153
+ class ReadAheadCache(BaseCache):
154
+ """Cache which reads only when we get beyond a block of data
155
+
156
+ This is a much simpler version of BytesCache, and does not attempt to
157
+ fill holes in the cache or keep fragments alive. It is best suited to
158
+ many small reads in a sequential order (e.g., reading lines from a file).
159
+ """
160
+
161
+ name = "readahead"
162
+
163
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
164
+ super().__init__(blocksize, fetcher, size)
165
+ self.cache = b""
166
+ self.start = 0
167
+ self.end = 0
168
+
169
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
170
+ if start is None:
171
+ start = 0
172
+ if end is None or end > self.size:
173
+ end = self.size
174
+ if start >= self.size or start >= end:
175
+ return b""
176
+ l = end - start
177
+ if start >= self.start and end <= self.end:
178
+ # cache hit
179
+ return self.cache[start - self.start : end - self.start]
180
+ elif self.start <= start < self.end:
181
+ # partial hit
182
+ part = self.cache[start - self.start :]
183
+ l -= len(part)
184
+ start = self.end
185
+ else:
186
+ # miss
187
+ part = b""
188
+ end = min(self.size, end + self.blocksize)
189
+ self.cache = self.fetcher(start, end) # new block replaces old
190
+ self.start = start
191
+ self.end = self.start + len(self.cache)
192
+ return part + self.cache[:l]
193
+
194
+
195
+ class FirstChunkCache(BaseCache):
196
+ """Caches the first block of a file only
197
+
198
+ This may be useful for file types where the metadata is stored in the header,
199
+ but is randomly accessed.
200
+ """
201
+
202
+ name = "first"
203
+
204
+ def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
205
+ super().__init__(blocksize, fetcher, size)
206
+ self.cache: bytes | None = None
207
+
208
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
209
+ start = start or 0
210
+ end = end or self.size
211
+ if start < self.blocksize:
212
+ if self.cache is None:
213
+ if end > self.blocksize:
214
+ data = self.fetcher(0, end)
215
+ self.cache = data[: self.blocksize]
216
+ return data[start:]
217
+ self.cache = self.fetcher(0, self.blocksize)
218
+ part = self.cache[start:end]
219
+ if end > self.blocksize:
220
+ part += self.fetcher(self.blocksize, end)
221
+ return part
222
+ else:
223
+ return self.fetcher(start, end)
224
+
225
+
226
+ class BlockCache(BaseCache):
227
+ """
228
+ Cache holding memory as a set of blocks.
229
+
230
+ Requests are only ever made ``blocksize`` at a time, and are
231
+ stored in an LRU cache. The least recently accessed block is
232
+ discarded when more than ``maxblocks`` are stored.
233
+
234
+ Parameters
235
+ ----------
236
+ blocksize : int
237
+ The number of bytes to store in each block.
238
+ Requests are only ever made for ``blocksize``, so this
239
+ should balance the overhead of making a request against
240
+ the granularity of the blocks.
241
+ fetcher : Callable
242
+ size : int
243
+ The total size of the file being cached.
244
+ maxblocks : int
245
+ The maximum number of blocks to cache for. The maximum memory
246
+ use for this cache is then ``blocksize * maxblocks``.
247
+ """
248
+
249
+ name = "blockcache"
250
+
251
+ def __init__(
252
+ self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
253
+ ) -> None:
254
+ super().__init__(blocksize, fetcher, size)
255
+ self.nblocks = math.ceil(size / blocksize)
256
+ self.maxblocks = maxblocks
257
+ self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
258
+
259
+ def __repr__(self) -> str:
260
+ return (
261
+ f"<BlockCache blocksize={self.blocksize}, "
262
+ f"size={self.size}, nblocks={self.nblocks}>"
263
+ )
264
+
265
+ def cache_info(self):
266
+ """
267
+ The statistics on the block cache.
268
+
269
+ Returns
270
+ -------
271
+ NamedTuple
272
+ Returned directly from the LRU Cache used internally.
273
+ """
274
+ return self._fetch_block_cached.cache_info()
275
+
276
+ def __getstate__(self) -> dict[str, Any]:
277
+ state = self.__dict__
278
+ del state["_fetch_block_cached"]
279
+ return state
280
+
281
+ def __setstate__(self, state: dict[str, Any]) -> None:
282
+ self.__dict__.update(state)
283
+ self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
284
+ self._fetch_block
285
+ )
286
+
287
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
288
+ if start is None:
289
+ start = 0
290
+ if end is None:
291
+ end = self.size
292
+ if start >= self.size or start >= end:
293
+ return b""
294
+
295
+ # byte position -> block numbers
296
+ start_block_number = start // self.blocksize
297
+ end_block_number = end // self.blocksize
298
+
299
+ # these are cached, so safe to do multiple calls for the same start and end.
300
+ for block_number in range(start_block_number, end_block_number + 1):
301
+ self._fetch_block_cached(block_number)
302
+
303
+ return self._read_cache(
304
+ start,
305
+ end,
306
+ start_block_number=start_block_number,
307
+ end_block_number=end_block_number,
308
+ )
309
+
310
+ def _fetch_block(self, block_number: int) -> bytes:
311
+ """
312
+ Fetch the block of data for `block_number`.
313
+ """
314
+ if block_number > self.nblocks:
315
+ raise ValueError(
316
+ f"'block_number={block_number}' is greater than "
317
+ f"the number of blocks ({self.nblocks})"
318
+ )
319
+
320
+ start = block_number * self.blocksize
321
+ end = start + self.blocksize
322
+ logger.info("BlockCache fetching block %d", block_number)
323
+ block_contents = super()._fetch(start, end)
324
+ return block_contents
325
+
326
+ def _read_cache(
327
+ self, start: int, end: int, start_block_number: int, end_block_number: int
328
+ ) -> bytes:
329
+ """
330
+ Read from our block cache.
331
+
332
+ Parameters
333
+ ----------
334
+ start, end : int
335
+ The start and end byte positions.
336
+ start_block_number, end_block_number : int
337
+ The start and end block numbers.
338
+ """
339
+ start_pos = start % self.blocksize
340
+ end_pos = end % self.blocksize
341
+
342
+ if start_block_number == end_block_number:
343
+ block: bytes = self._fetch_block_cached(start_block_number)
344
+ return block[start_pos:end_pos]
345
+
346
+ else:
347
+ # read from the initial
348
+ out = []
349
+ out.append(self._fetch_block_cached(start_block_number)[start_pos:])
350
+
351
+ # intermediate blocks
352
+ # Note: it'd be nice to combine these into one big request. However
353
+ # that doesn't play nicely with our LRU cache.
354
+ for block_number in range(start_block_number + 1, end_block_number):
355
+ out.append(self._fetch_block_cached(block_number))
356
+
357
+ # final block
358
+ out.append(self._fetch_block_cached(end_block_number)[:end_pos])
359
+
360
+ return b"".join(out)
361
+
362
+
363
+ class BytesCache(BaseCache):
364
+ """Cache which holds data in a in-memory bytes object
365
+
366
+ Implements read-ahead by the block size, for semi-random reads progressing
367
+ through the file.
368
+
369
+ Parameters
370
+ ----------
371
+ trim: bool
372
+ As we read more data, whether to discard the start of the buffer when
373
+ we are more than a blocksize ahead of it.
374
+ """
375
+
376
+ name: ClassVar[str] = "bytes"
377
+
378
+ def __init__(
379
+ self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
380
+ ) -> None:
381
+ super().__init__(blocksize, fetcher, size)
382
+ self.cache = b""
383
+ self.start: int | None = None
384
+ self.end: int | None = None
385
+ self.trim = trim
386
+
387
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
388
+ # TODO: only set start/end after fetch, in case it fails?
389
+ # is this where retry logic might go?
390
+ if start is None:
391
+ start = 0
392
+ if end is None:
393
+ end = self.size
394
+ if start >= self.size or start >= end:
395
+ return b""
396
+ if (
397
+ self.start is not None
398
+ and start >= self.start
399
+ and self.end is not None
400
+ and end < self.end
401
+ ):
402
+ # cache hit: we have all the required data
403
+ offset = start - self.start
404
+ return self.cache[offset : offset + end - start]
405
+
406
+ if self.blocksize:
407
+ bend = min(self.size, end + self.blocksize)
408
+ else:
409
+ bend = end
410
+
411
+ if bend == start or start > self.size:
412
+ return b""
413
+
414
+ if (self.start is None or start < self.start) and (
415
+ self.end is None or end > self.end
416
+ ):
417
+ # First read, or extending both before and after
418
+ self.cache = self.fetcher(start, bend)
419
+ self.start = start
420
+ else:
421
+ assert self.start is not None
422
+ assert self.end is not None
423
+
424
+ if start < self.start:
425
+ if self.end is None or self.end - end > self.blocksize:
426
+ self.cache = self.fetcher(start, bend)
427
+ self.start = start
428
+ else:
429
+ new = self.fetcher(start, self.start)
430
+ self.start = start
431
+ self.cache = new + self.cache
432
+ elif self.end is not None and bend > self.end:
433
+ if self.end > self.size:
434
+ pass
435
+ elif end - self.end > self.blocksize:
436
+ self.cache = self.fetcher(start, bend)
437
+ self.start = start
438
+ else:
439
+ new = self.fetcher(self.end, bend)
440
+ self.cache = self.cache + new
441
+
442
+ self.end = self.start + len(self.cache)
443
+ offset = start - self.start
444
+ out = self.cache[offset : offset + end - start]
445
+ if self.trim:
446
+ num = (self.end - self.start) // (self.blocksize + 1)
447
+ if num > 1:
448
+ self.start += self.blocksize * num
449
+ self.cache = self.cache[self.blocksize * num :]
450
+ return out
451
+
452
+ def __len__(self) -> int:
453
+ return len(self.cache)
454
+
455
+
456
+ class AllBytes(BaseCache):
457
+ """Cache entire contents of the file"""
458
+
459
+ name: ClassVar[str] = "all"
460
+
461
+ def __init__(
462
+ self,
463
+ blocksize: int | None = None,
464
+ fetcher: Fetcher | None = None,
465
+ size: int | None = None,
466
+ data: bytes | None = None,
467
+ ) -> None:
468
+ super().__init__(blocksize, fetcher, size) # type: ignore[arg-type]
469
+ if data is None:
470
+ data = self.fetcher(0, self.size)
471
+ self.data = data
472
+
473
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
474
+ return self.data[start:stop]
475
+
476
+
477
+ class KnownPartsOfAFile(BaseCache):
478
+ """
479
+ Cache holding known file parts.
480
+
481
+ Parameters
482
+ ----------
483
+ blocksize: int
484
+ How far to read ahead in numbers of bytes
485
+ fetcher: func
486
+ Function of the form f(start, end) which gets bytes from remote as
487
+ specified
488
+ size: int
489
+ How big this file is
490
+ data: dict
491
+ A dictionary mapping explicit `(start, stop)` file-offset tuples
492
+ with known bytes.
493
+ strict: bool, default True
494
+ Whether to fetch reads that go beyond a known byte-range boundary.
495
+ If `False`, any read that ends outside a known part will be zero
496
+ padded. Note that zero padding will not be used for reads that
497
+ begin outside a known byte-range.
498
+ """
499
+
500
+ name: ClassVar[str] = "parts"
501
+
502
+ def __init__(
503
+ self,
504
+ blocksize: int,
505
+ fetcher: Fetcher,
506
+ size: int,
507
+ data: dict[tuple[int, int], bytes] = {},
508
+ strict: bool = True,
509
+ **_: Any,
510
+ ):
511
+ super().__init__(blocksize, fetcher, size)
512
+ self.strict = strict
513
+
514
+ # simple consolidation of contiguous blocks
515
+ if data:
516
+ old_offsets = sorted(data.keys())
517
+ offsets = [old_offsets[0]]
518
+ blocks = [data.pop(old_offsets[0])]
519
+ for start, stop in old_offsets[1:]:
520
+ start0, stop0 = offsets[-1]
521
+ if start == stop0:
522
+ offsets[-1] = (start0, stop)
523
+ blocks[-1] += data.pop((start, stop))
524
+ else:
525
+ offsets.append((start, stop))
526
+ blocks.append(data.pop((start, stop)))
527
+
528
+ self.data = dict(zip(offsets, blocks))
529
+ else:
530
+ self.data = data
531
+
532
+ def _fetch(self, start: int | None, stop: int | None) -> bytes:
533
+ if start is None:
534
+ start = 0
535
+ if stop is None:
536
+ stop = self.size
537
+
538
+ out = b""
539
+ for (loc0, loc1), data in self.data.items():
540
+ # If self.strict=False, use zero-padded data
541
+ # for reads beyond the end of a "known" buffer
542
+ if loc0 <= start < loc1:
543
+ off = start - loc0
544
+ out = data[off : off + stop - start]
545
+ if not self.strict or loc0 <= stop <= loc1:
546
+ # The request is within a known range, or
547
+ # it begins within a known range, and we
548
+ # are allowed to pad reads beyond the
549
+ # buffer with zero
550
+ out += b"\x00" * (stop - start - len(out))
551
+ return out
552
+ else:
553
+ # The request ends outside a known range,
554
+ # and we are being "strict" about reads
555
+ # beyond the buffer
556
+ start = loc1
557
+ break
558
+
559
+ # We only get here if there is a request outside the
560
+ # known parts of the file. In an ideal world, this
561
+ # should never happen
562
+ if self.fetcher is None:
563
+ # We cannot fetch the data, so raise an error
564
+ raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ")
565
+ # We can fetch the data, but should warn the user
566
+ # that this may be slow
567
+ warnings.warn(
568
+ f"Read is outside the known file parts: {(start, stop)}. "
569
+ f"IO/caching performance may be poor!"
570
+ )
571
+ logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
572
+ return out + super()._fetch(start, stop)
573
+
574
+
575
+ class UpdatableLRU(Generic[P, T]):
576
+ """
577
+ Custom implementation of LRU cache that allows updating keys
578
+
579
+ Used by BackgroudBlockCache
580
+ """
581
+
582
+ class CacheInfo(NamedTuple):
583
+ hits: int
584
+ misses: int
585
+ maxsize: int
586
+ currsize: int
587
+
588
+ def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
589
+ self._cache: OrderedDict[Any, T] = collections.OrderedDict()
590
+ self._func = func
591
+ self._max_size = max_size
592
+ self._hits = 0
593
+ self._misses = 0
594
+ self._lock = threading.Lock()
595
+
596
+ def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
597
+ if kwargs:
598
+ raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
599
+ with self._lock:
600
+ if args in self._cache:
601
+ self._cache.move_to_end(args)
602
+ self._hits += 1
603
+ return self._cache[args]
604
+
605
+ result = self._func(*args, **kwargs)
606
+
607
+ with self._lock:
608
+ self._cache[args] = result
609
+ self._misses += 1
610
+ if len(self._cache) > self._max_size:
611
+ self._cache.popitem(last=False)
612
+
613
+ return result
614
+
615
+ def is_key_cached(self, *args: Any) -> bool:
616
+ with self._lock:
617
+ return args in self._cache
618
+
619
+ def add_key(self, result: T, *args: Any) -> None:
620
+ with self._lock:
621
+ self._cache[args] = result
622
+ if len(self._cache) > self._max_size:
623
+ self._cache.popitem(last=False)
624
+
625
+ def cache_info(self) -> UpdatableLRU.CacheInfo:
626
+ with self._lock:
627
+ return self.CacheInfo(
628
+ maxsize=self._max_size,
629
+ currsize=len(self._cache),
630
+ hits=self._hits,
631
+ misses=self._misses,
632
+ )
633
+
634
+
635
+ class BackgroundBlockCache(BaseCache):
636
+ """
637
+ Cache holding memory as a set of blocks with pre-loading of
638
+ the next block in the background.
639
+
640
+ Requests are only ever made ``blocksize`` at a time, and are
641
+ stored in an LRU cache. The least recently accessed block is
642
+ discarded when more than ``maxblocks`` are stored. If the
643
+ next block is not in cache, it is loaded in a separate thread
644
+ in non-blocking way.
645
+
646
+ Parameters
647
+ ----------
648
+ blocksize : int
649
+ The number of bytes to store in each block.
650
+ Requests are only ever made for ``blocksize``, so this
651
+ should balance the overhead of making a request against
652
+ the granularity of the blocks.
653
+ fetcher : Callable
654
+ size : int
655
+ The total size of the file being cached.
656
+ maxblocks : int
657
+ The maximum number of blocks to cache for. The maximum memory
658
+ use for this cache is then ``blocksize * maxblocks``.
659
+ """
660
+
661
+ name: ClassVar[str] = "background"
662
+
663
+ def __init__(
664
+ self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
665
+ ) -> None:
666
+ super().__init__(blocksize, fetcher, size)
667
+ self.nblocks = math.ceil(size / blocksize)
668
+ self.maxblocks = maxblocks
669
+ self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
670
+
671
+ self._thread_executor = ThreadPoolExecutor(max_workers=1)
672
+ self._fetch_future_block_number: int | None = None
673
+ self._fetch_future: Future[bytes] | None = None
674
+ self._fetch_future_lock = threading.Lock()
675
+
676
+ def __repr__(self) -> str:
677
+ return (
678
+ f"<BackgroundBlockCache blocksize={self.blocksize}, "
679
+ f"size={self.size}, nblocks={self.nblocks}>"
680
+ )
681
+
682
+ def cache_info(self) -> UpdatableLRU.CacheInfo:
683
+ """
684
+ The statistics on the block cache.
685
+
686
+ Returns
687
+ -------
688
+ NamedTuple
689
+ Returned directly from the LRU Cache used internally.
690
+ """
691
+ return self._fetch_block_cached.cache_info()
692
+
693
+ def __getstate__(self) -> dict[str, Any]:
694
+ state = self.__dict__
695
+ del state["_fetch_block_cached"]
696
+ del state["_thread_executor"]
697
+ del state["_fetch_future_block_number"]
698
+ del state["_fetch_future"]
699
+ del state["_fetch_future_lock"]
700
+ return state
701
+
702
+ def __setstate__(self, state) -> None:
703
+ self.__dict__.update(state)
704
+ self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
705
+ self._thread_executor = ThreadPoolExecutor(max_workers=1)
706
+ self._fetch_future_block_number = None
707
+ self._fetch_future = None
708
+ self._fetch_future_lock = threading.Lock()
709
+
710
+ def _fetch(self, start: int | None, end: int | None) -> bytes:
711
+ if start is None:
712
+ start = 0
713
+ if end is None:
714
+ end = self.size
715
+ if start >= self.size or start >= end:
716
+ return b""
717
+
718
+ # byte position -> block numbers
719
+ start_block_number = start // self.blocksize
720
+ end_block_number = end // self.blocksize
721
+
722
+ fetch_future_block_number = None
723
+ fetch_future = None
724
+ with self._fetch_future_lock:
725
+ # Background thread is running. Check we we can or must join it.
726
+ if self._fetch_future is not None:
727
+ assert self._fetch_future_block_number is not None
728
+ if self._fetch_future.done():
729
+ logger.info("BlockCache joined background fetch without waiting.")
730
+ self._fetch_block_cached.add_key(
731
+ self._fetch_future.result(), self._fetch_future_block_number
732
+ )
733
+ # Cleanup the fetch variables. Done with fetching the block.
734
+ self._fetch_future_block_number = None
735
+ self._fetch_future = None
736
+ else:
737
+ # Must join if we need the block for the current fetch
738
+ must_join = bool(
739
+ start_block_number
740
+ <= self._fetch_future_block_number
741
+ <= end_block_number
742
+ )
743
+ if must_join:
744
+ # Copy to the local variables to release lock
745
+ # before waiting for result
746
+ fetch_future_block_number = self._fetch_future_block_number
747
+ fetch_future = self._fetch_future
748
+
749
+ # Cleanup the fetch variables. Have a local copy.
750
+ self._fetch_future_block_number = None
751
+ self._fetch_future = None
752
+
753
+ # Need to wait for the future for the current read
754
+ if fetch_future is not None:
755
+ logger.info("BlockCache waiting for background fetch.")
756
+ # Wait until result and put it in cache
757
+ self._fetch_block_cached.add_key(
758
+ fetch_future.result(), fetch_future_block_number
759
+ )
760
+
761
+ # these are cached, so safe to do multiple calls for the same start and end.
762
+ for block_number in range(start_block_number, end_block_number + 1):
763
+ self._fetch_block_cached(block_number)
764
+
765
+ # fetch next block in the background if nothing is running in the background,
766
+ # the block is within file and it is not already cached
767
+ end_block_plus_1 = end_block_number + 1
768
+ with self._fetch_future_lock:
769
+ if (
770
+ self._fetch_future is None
771
+ and end_block_plus_1 <= self.nblocks
772
+ and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
773
+ ):
774
+ self._fetch_future_block_number = end_block_plus_1
775
+ self._fetch_future = self._thread_executor.submit(
776
+ self._fetch_block, end_block_plus_1, "async"
777
+ )
778
+
779
+ return self._read_cache(
780
+ start,
781
+ end,
782
+ start_block_number=start_block_number,
783
+ end_block_number=end_block_number,
784
+ )
785
+
786
+ def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
787
+ """
788
+ Fetch the block of data for `block_number`.
789
+ """
790
+ if block_number > self.nblocks:
791
+ raise ValueError(
792
+ f"'block_number={block_number}' is greater than "
793
+ f"the number of blocks ({self.nblocks})"
794
+ )
795
+
796
+ start = block_number * self.blocksize
797
+ end = start + self.blocksize
798
+ logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
799
+ block_contents = super()._fetch(start, end)
800
+ return block_contents
801
+
802
+ def _read_cache(
803
+ self, start: int, end: int, start_block_number: int, end_block_number: int
804
+ ) -> bytes:
805
+ """
806
+ Read from our block cache.
807
+
808
+ Parameters
809
+ ----------
810
+ start, end : int
811
+ The start and end byte positions.
812
+ start_block_number, end_block_number : int
813
+ The start and end block numbers.
814
+ """
815
+ start_pos = start % self.blocksize
816
+ end_pos = end % self.blocksize
817
+
818
+ if start_block_number == end_block_number:
819
+ block = self._fetch_block_cached(start_block_number)
820
+ return block[start_pos:end_pos]
821
+
822
+ else:
823
+ # read from the initial
824
+ out = []
825
+ out.append(self._fetch_block_cached(start_block_number)[start_pos:])
826
+
827
+ # intermediate blocks
828
+ # Note: it'd be nice to combine these into one big request. However
829
+ # that doesn't play nicely with our LRU cache.
830
+ for block_number in range(start_block_number + 1, end_block_number):
831
+ out.append(self._fetch_block_cached(block_number))
832
+
833
+ # final block
834
+ out.append(self._fetch_block_cached(end_block_number)[:end_pos])
835
+
836
+ return b"".join(out)
837
+
838
+
839
+ caches: dict[str | None, type[BaseCache]] = {
840
+ # one custom case
841
+ None: BaseCache,
842
+ }
843
+
844
+
845
+ def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
846
+ """'Register' cache implementation.
847
+
848
+ Parameters
849
+ ----------
850
+ clobber: bool, optional
851
+ If set to True (default is False) - allow to overwrite existing
852
+ entry.
853
+
854
+ Raises
855
+ ------
856
+ ValueError
857
+ """
858
+ name = cls.name
859
+ if not clobber and name in caches:
860
+ raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
861
+ caches[name] = cls
862
+
863
+
864
+ for c in (
865
+ BaseCache,
866
+ MMapCache,
867
+ BytesCache,
868
+ ReadAheadCache,
869
+ BlockCache,
870
+ FirstChunkCache,
871
+ AllBytes,
872
+ KnownPartsOfAFile,
873
+ BackgroundBlockCache,
874
+ ):
875
+ register_cache(c)
lib/python3.11/site-packages/fsspec/callbacks.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Callback:
2
+ """
3
+ Base class and interface for callback mechanism
4
+
5
+ This class can be used directly for monitoring file transfers by
6
+ providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument,
7
+ below), or subclassed for more specialised behaviour.
8
+
9
+ Parameters
10
+ ----------
11
+ size: int (optional)
12
+ Nominal quantity for the value that corresponds to a complete
13
+ transfer, e.g., total number of tiles or total number of
14
+ bytes
15
+ value: int (0)
16
+ Starting internal counter value
17
+ hooks: dict or None
18
+ A dict of named functions to be called on each update. The signature
19
+ of these must be ``f(size, value, **kwargs)``
20
+ """
21
+
22
+ def __init__(self, size=None, value=0, hooks=None, **kwargs):
23
+ self.size = size
24
+ self.value = value
25
+ self.hooks = hooks or {}
26
+ self.kw = kwargs
27
+
28
+ def set_size(self, size):
29
+ """
30
+ Set the internal maximum size attribute
31
+
32
+ Usually called if not initially set at instantiation. Note that this
33
+ triggers a ``call()``.
34
+
35
+ Parameters
36
+ ----------
37
+ size: int
38
+ """
39
+ self.size = size
40
+ self.call()
41
+
42
+ def absolute_update(self, value):
43
+ """
44
+ Set the internal value state
45
+
46
+ Triggers ``call()``
47
+
48
+ Parameters
49
+ ----------
50
+ value: int
51
+ """
52
+ self.value = value
53
+ self.call()
54
+
55
+ def relative_update(self, inc=1):
56
+ """
57
+ Delta increment the internal counter
58
+
59
+ Triggers ``call()``
60
+
61
+ Parameters
62
+ ----------
63
+ inc: int
64
+ """
65
+ self.value += inc
66
+ self.call()
67
+
68
+ def call(self, hook_name=None, **kwargs):
69
+ """
70
+ Execute hook(s) with current state
71
+
72
+ Each function is passed the internal size and current value
73
+
74
+ Parameters
75
+ ----------
76
+ hook_name: str or None
77
+ If given, execute on this hook
78
+ kwargs: passed on to (all) hook(s)
79
+ """
80
+ if not self.hooks:
81
+ return
82
+ kw = self.kw.copy()
83
+ kw.update(kwargs)
84
+ if hook_name:
85
+ if hook_name not in self.hooks:
86
+ return
87
+ return self.hooks[hook_name](self.size, self.value, **kw)
88
+ for hook in self.hooks.values() or []:
89
+ hook(self.size, self.value, **kw)
90
+
91
+ def wrap(self, iterable):
92
+ """
93
+ Wrap an iterable to call ``relative_update`` on each iterations
94
+
95
+ Parameters
96
+ ----------
97
+ iterable: Iterable
98
+ The iterable that is being wrapped
99
+ """
100
+ for item in iterable:
101
+ self.relative_update()
102
+ yield item
103
+
104
+ def branch(self, path_1, path_2, kwargs):
105
+ """
106
+ Set callbacks for child transfers
107
+
108
+ If this callback is operating at a higher level, e.g., put, which may
109
+ trigger transfers that can also be monitored. The passed kwargs are
110
+ to be *mutated* to add ``callback=``, if this class supports branching
111
+ to children.
112
+
113
+ Parameters
114
+ ----------
115
+ path_1: str
116
+ Child's source path
117
+ path_2: str
118
+ Child's destination path
119
+ kwargs: dict
120
+ arguments passed to child method, e.g., put_file.
121
+
122
+ Returns
123
+ -------
124
+
125
+ """
126
+ return None
127
+
128
+ def no_op(self, *_, **__):
129
+ pass
130
+
131
+ def __getattr__(self, item):
132
+ """
133
+ If undefined methods are called on this class, nothing happens
134
+ """
135
+ return self.no_op
136
+
137
+ @classmethod
138
+ def as_callback(cls, maybe_callback=None):
139
+ """Transform callback=... into Callback instance
140
+
141
+ For the special value of ``None``, return the global instance of
142
+ ``NoOpCallback``. This is an alternative to including
143
+ ``callback=_DEFAULT_CALLBACK`` directly in a method signature.
144
+ """
145
+ if maybe_callback is None:
146
+ return _DEFAULT_CALLBACK
147
+ return maybe_callback
148
+
149
+
150
+ class NoOpCallback(Callback):
151
+ """
152
+ This implementation of Callback does exactly nothing
153
+ """
154
+
155
+ def call(self, *args, **kwargs):
156
+ return None
157
+
158
+
159
+ class DotPrinterCallback(Callback):
160
+ """
161
+ Simple example Callback implementation
162
+
163
+ Almost identical to Callback with a hook that prints a char; here we
164
+ demonstrate how the outer layer may print "#" and the inner layer "."
165
+ """
166
+
167
+ def __init__(self, chr_to_print="#", **kwargs):
168
+ self.chr = chr_to_print
169
+ super().__init__(**kwargs)
170
+
171
+ def branch(self, path_1, path_2, kwargs):
172
+ """Mutate kwargs to add new instance with different print char"""
173
+ kwargs["callback"] = DotPrinterCallback(".")
174
+
175
+ def call(self, **kwargs):
176
+ """Just outputs a character"""
177
+ print(self.chr, end="")
178
+
179
+
180
+ class TqdmCallback(Callback):
181
+ """
182
+ A callback to display a progress bar using tqdm
183
+
184
+ Parameters
185
+ ----------
186
+ tqdm_kwargs : dict, (optional)
187
+ Any argument accepted by the tqdm constructor.
188
+ See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
189
+ Will be forwarded to tqdm.
190
+
191
+ Examples
192
+ --------
193
+ >>> import fsspec
194
+ >>> from fsspec.callbacks import TqdmCallback
195
+ >>> fs = fsspec.filesystem("memory")
196
+ >>> path2distant_data = "/your-path"
197
+ >>> fs.upload(
198
+ ".",
199
+ path2distant_data,
200
+ recursive=True,
201
+ callback=TqdmCallback(),
202
+ )
203
+
204
+ You can forward args to tqdm using the ``tqdm_kwargs`` parameter.
205
+
206
+ >>> fs.upload(
207
+ ".",
208
+ path2distant_data,
209
+ recursive=True,
210
+ callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
211
+ )
212
+ """
213
+
214
+ def __init__(self, tqdm_kwargs=None, *args, **kwargs):
215
+ try:
216
+ import tqdm
217
+
218
+ self._tqdm = tqdm
219
+ except ImportError as exce:
220
+ raise ImportError(
221
+ "Using TqdmCallback requires tqdm to be installed"
222
+ ) from exce
223
+
224
+ self._tqdm_kwargs = tqdm_kwargs or {}
225
+ super().__init__(*args, **kwargs)
226
+
227
+ def set_size(self, size):
228
+ self.tqdm = self._tqdm.tqdm(total=size, **self._tqdm_kwargs)
229
+
230
+ def relative_update(self, inc=1):
231
+ self.tqdm.update(inc)
232
+
233
+ def __del__(self):
234
+ self.tqdm.close()
235
+ self.tqdm = None
236
+
237
+
238
+ _DEFAULT_CALLBACK = NoOpCallback()
lib/python3.11/site-packages/fsspec/compression.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Helper functions for a standard streaming compression API"""
2
+ from zipfile import ZipFile
3
+
4
+ import fsspec.utils
5
+ from fsspec.spec import AbstractBufferedFile
6
+
7
+
8
+ def noop_file(file, mode, **kwargs):
9
+ return file
10
+
11
+
12
+ # TODO: files should also be available as contexts
13
+ # should be functions of the form func(infile, mode=, **kwargs) -> file-like
14
+ compr = {None: noop_file}
15
+
16
+
17
+ def register_compression(name, callback, extensions, force=False):
18
+ """Register an "inferable" file compression type.
19
+
20
+ Registers transparent file compression type for use with fsspec.open.
21
+ Compression can be specified by name in open, or "infer"-ed for any files
22
+ ending with the given extensions.
23
+
24
+ Args:
25
+ name: (str) The compression type name. Eg. "gzip".
26
+ callback: A callable of form (infile, mode, **kwargs) -> file-like.
27
+ Accepts an input file-like object, the target mode and kwargs.
28
+ Returns a wrapped file-like object.
29
+ extensions: (str, Iterable[str]) A file extension, or list of file
30
+ extensions for which to infer this compression scheme. Eg. "gz".
31
+ force: (bool) Force re-registration of compression type or extensions.
32
+
33
+ Raises:
34
+ ValueError: If name or extensions already registered, and not force.
35
+
36
+ """
37
+ if isinstance(extensions, str):
38
+ extensions = [extensions]
39
+
40
+ # Validate registration
41
+ if name in compr and not force:
42
+ raise ValueError(f"Duplicate compression registration: {name}")
43
+
44
+ for ext in extensions:
45
+ if ext in fsspec.utils.compressions and not force:
46
+ raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
47
+
48
+ compr[name] = callback
49
+
50
+ for ext in extensions:
51
+ fsspec.utils.compressions[ext] = name
52
+
53
+
54
+ def unzip(infile, mode="rb", filename=None, **kwargs):
55
+ if "r" not in mode:
56
+ filename = filename or "file"
57
+ z = ZipFile(infile, mode="w", **kwargs)
58
+ fo = z.open(filename, mode="w")
59
+ fo.close = lambda closer=fo.close: closer() or z.close()
60
+ return fo
61
+ z = ZipFile(infile)
62
+ if filename is None:
63
+ filename = z.namelist()[0]
64
+ return z.open(filename, mode="r", **kwargs)
65
+
66
+
67
+ register_compression("zip", unzip, "zip")
68
+
69
+ try:
70
+ from bz2 import BZ2File
71
+ except ImportError:
72
+ pass
73
+ else:
74
+ register_compression("bz2", BZ2File, "bz2")
75
+
76
+ try: # pragma: no cover
77
+ from isal import igzip
78
+
79
+ def isal(infile, mode="rb", **kwargs):
80
+ return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
81
+
82
+ register_compression("gzip", isal, "gz")
83
+ except ImportError:
84
+ from gzip import GzipFile
85
+
86
+ register_compression(
87
+ "gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
88
+ )
89
+
90
+ try:
91
+ from lzma import LZMAFile
92
+
93
+ register_compression("lzma", LZMAFile, "xz")
94
+ register_compression("xz", LZMAFile, "xz", force=True)
95
+ except ImportError:
96
+ pass
97
+
98
+ try:
99
+ import lzmaffi
100
+
101
+ register_compression("lzma", lzmaffi.LZMAFile, "xz", force=True)
102
+ register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
103
+ except ImportError:
104
+ pass
105
+
106
+
107
+ class SnappyFile(AbstractBufferedFile):
108
+ def __init__(self, infile, mode, **kwargs):
109
+ import snappy
110
+
111
+ super().__init__(
112
+ fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
113
+ )
114
+ self.infile = infile
115
+ if "r" in mode:
116
+ self.codec = snappy.StreamDecompressor()
117
+ else:
118
+ self.codec = snappy.StreamCompressor()
119
+
120
+ def _upload_chunk(self, final=False):
121
+ self.buffer.seek(0)
122
+ out = self.codec.add_chunk(self.buffer.read())
123
+ self.infile.write(out)
124
+ return True
125
+
126
+ def seek(self, loc, whence=0):
127
+ raise NotImplementedError("SnappyFile is not seekable")
128
+
129
+ def seekable(self):
130
+ return False
131
+
132
+ def _fetch_range(self, start, end):
133
+ """Get the specified set of bytes from remote"""
134
+ data = self.infile.read(end - start)
135
+ return self.codec.decompress(data)
136
+
137
+
138
+ try:
139
+ import snappy
140
+
141
+ snappy.compress
142
+ # Snappy may use the .sz file extension, but this is not part of the
143
+ # standard implementation.
144
+ register_compression("snappy", SnappyFile, [])
145
+
146
+ except (ImportError, NameError, AttributeError):
147
+ pass
148
+
149
+ try:
150
+ import lz4.frame
151
+
152
+ register_compression("lz4", lz4.frame.open, "lz4")
153
+ except ImportError:
154
+ pass
155
+
156
+ try:
157
+ import zstandard as zstd
158
+
159
+ def zstandard_file(infile, mode="rb"):
160
+ if "r" in mode:
161
+ cctx = zstd.ZstdDecompressor()
162
+ return cctx.stream_reader(infile)
163
+ else:
164
+ cctx = zstd.ZstdCompressor(level=10)
165
+ return cctx.stream_writer(infile)
166
+
167
+ register_compression("zstd", zstandard_file, "zst")
168
+ except ImportError:
169
+ pass
170
+
171
+
172
+ def available_compressions():
173
+ """Return a list of the implemented compressions."""
174
+ return list(compr)
lib/python3.11/site-packages/fsspec/config.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import configparser
4
+ import json
5
+ import os
6
+ import warnings
7
+ from typing import Any
8
+
9
+ conf: dict[str, dict[str, Any]] = {}
10
+ default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
11
+ conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
12
+
13
+
14
+ def set_conf_env(conf_dict, envdict=os.environ):
15
+ """Set config values from environment variables
16
+
17
+ Looks for variables of the form ``FSSPEC_<protocol>`` and
18
+ ``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
19
+ as a json dictionary and used to ``update`` the config of the
20
+ corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
21
+ attempt to convert the string value, but the kwarg keys will be lower-cased.
22
+
23
+ The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
24
+ ``FSSPEC_<protocol>`` ones.
25
+
26
+ Parameters
27
+ ----------
28
+ conf_dict : dict(str, dict)
29
+ This dict will be mutated
30
+ envdict : dict-like(str, str)
31
+ Source for the values - usually the real environment
32
+ """
33
+ kwarg_keys = []
34
+ for key in envdict:
35
+ if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
36
+ if key.count("_") > 1:
37
+ kwarg_keys.append(key)
38
+ continue
39
+ try:
40
+ value = json.loads(envdict[key])
41
+ except json.decoder.JSONDecodeError as ex:
42
+ warnings.warn(
43
+ f"Ignoring environment variable {key} due to a parse failure: {ex}"
44
+ )
45
+ else:
46
+ if isinstance(value, dict):
47
+ _, proto = key.split("_", 1)
48
+ conf_dict.setdefault(proto.lower(), {}).update(value)
49
+ else:
50
+ warnings.warn(
51
+ f"Ignoring environment variable {key} due to not being a dict:"
52
+ f" {type(value)}"
53
+ )
54
+ elif key.startswith("FSSPEC"):
55
+ warnings.warn(
56
+ f"Ignoring environment variable {key} due to having an unexpected name"
57
+ )
58
+
59
+ for key in kwarg_keys:
60
+ _, proto, kwarg = key.split("_", 2)
61
+ conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
62
+
63
+
64
+ def set_conf_files(cdir, conf_dict):
65
+ """Set config values from files
66
+
67
+ Scans for INI and JSON files in the given dictionary, and uses their
68
+ contents to set the config. In case of repeated values, later values
69
+ win.
70
+
71
+ In the case of INI files, all values are strings, and these will not
72
+ be converted.
73
+
74
+ Parameters
75
+ ----------
76
+ cdir : str
77
+ Directory to search
78
+ conf_dict : dict(str, dict)
79
+ This dict will be mutated
80
+ """
81
+ if not os.path.isdir(cdir):
82
+ return
83
+ allfiles = sorted(os.listdir(cdir))
84
+ for fn in allfiles:
85
+ if fn.endswith(".ini"):
86
+ ini = configparser.ConfigParser()
87
+ ini.read(os.path.join(cdir, fn))
88
+ for key in ini:
89
+ if key == "DEFAULT":
90
+ continue
91
+ conf_dict.setdefault(key, {}).update(dict(ini[key]))
92
+ if fn.endswith(".json"):
93
+ with open(os.path.join(cdir, fn)) as f:
94
+ js = json.load(f)
95
+ for key in js:
96
+ conf_dict.setdefault(key, {}).update(dict(js[key]))
97
+
98
+
99
+ def apply_config(cls, kwargs, conf_dict=None):
100
+ """Supply default values for kwargs when instantiating class
101
+
102
+ Augments the passed kwargs, by finding entries in the config dict
103
+ which match the classes ``.protocol`` attribute (one or more str)
104
+
105
+ Parameters
106
+ ----------
107
+ cls : file system implementation
108
+ kwargs : dict
109
+ conf_dict : dict of dict
110
+ Typically this is the global configuration
111
+
112
+ Returns
113
+ -------
114
+ dict : the modified set of kwargs
115
+ """
116
+ if conf_dict is None:
117
+ conf_dict = conf
118
+ protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
119
+ kw = {}
120
+ for proto in protos:
121
+ # default kwargs from the current state of the config
122
+ if proto in conf_dict:
123
+ kw.update(conf_dict[proto])
124
+ # explicit kwargs always win
125
+ kw.update(**kwargs)
126
+ kwargs = kw
127
+ return kwargs
128
+
129
+
130
+ set_conf_files(conf_dir, conf)
131
+ set_conf_env(conf)
lib/python3.11/site-packages/fsspec/conftest.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import sys
5
+ import time
6
+
7
+ import pytest
8
+
9
+ import fsspec
10
+ from fsspec.implementations.cached import CachingFileSystem
11
+
12
+
13
+ @pytest.fixture()
14
+ def m():
15
+ """
16
+ Fixture providing a memory filesystem.
17
+ """
18
+ m = fsspec.filesystem("memory")
19
+ m.store.clear()
20
+ m.pseudo_dirs.clear()
21
+ m.pseudo_dirs.append("")
22
+ try:
23
+ yield m
24
+ finally:
25
+ m.store.clear()
26
+ m.pseudo_dirs.clear()
27
+ m.pseudo_dirs.append("")
28
+
29
+
30
+ @pytest.fixture
31
+ def ftp_writable(tmpdir):
32
+ """
33
+ Fixture providing a writable FTP filesystem.
34
+ """
35
+ pytest.importorskip("pyftpdlib")
36
+ from fsspec.implementations.ftp import FTPFileSystem
37
+
38
+ FTPFileSystem.clear_instance_cache() # remove lingering connections
39
+ CachingFileSystem.clear_instance_cache()
40
+ d = str(tmpdir)
41
+ with open(os.path.join(d, "out"), "wb") as f:
42
+ f.write(b"hello" * 10000)
43
+ P = subprocess.Popen(
44
+ [sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
45
+ )
46
+ try:
47
+ time.sleep(1)
48
+ yield "localhost", 2121, "user", "pass"
49
+ finally:
50
+ P.terminate()
51
+ P.wait()
52
+ try:
53
+ shutil.rmtree(tmpdir)
54
+ except Exception:
55
+ pass
lib/python3.11/site-packages/fsspec/core.py ADDED
@@ -0,0 +1,710 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+ import os
6
+ import re
7
+ from glob import has_magic
8
+ from pathlib import Path
9
+
10
+ # for backwards compat, we export cache things from here too
11
+ from .caching import ( # noqa: F401
12
+ BaseCache,
13
+ BlockCache,
14
+ BytesCache,
15
+ MMapCache,
16
+ ReadAheadCache,
17
+ caches,
18
+ )
19
+ from .compression import compr
20
+ from .registry import filesystem, get_filesystem_class
21
+ from .utils import (
22
+ _unstrip_protocol,
23
+ build_name_function,
24
+ infer_compression,
25
+ stringify_path,
26
+ )
27
+
28
+ logger = logging.getLogger("fsspec")
29
+
30
+
31
+ class OpenFile:
32
+ """
33
+ File-like object to be used in a context
34
+
35
+ Can layer (buffered) text-mode and compression over any file-system, which
36
+ are typically binary-only.
37
+
38
+ These instances are safe to serialize, as the low-level file object
39
+ is not created until invoked using ``with``.
40
+
41
+ Parameters
42
+ ----------
43
+ fs: FileSystem
44
+ The file system to use for opening the file. Should be a subclass or duck-type
45
+ with ``fsspec.spec.AbstractFileSystem``
46
+ path: str
47
+ Location to open
48
+ mode: str like 'rb', optional
49
+ Mode of the opened file
50
+ compression: str or None, optional
51
+ Compression to apply
52
+ encoding: str or None, optional
53
+ The encoding to use if opened in text mode.
54
+ errors: str or None, optional
55
+ How to handle encoding errors if opened in text mode.
56
+ newline: None or str
57
+ Passed to TextIOWrapper in text mode, how to handle line endings.
58
+ autoopen: bool
59
+ If True, calls open() immediately. Mostly used by pickle
60
+ pos: int
61
+ If given and autoopen is True, seek to this location immediately
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ fs,
67
+ path,
68
+ mode="rb",
69
+ compression=None,
70
+ encoding=None,
71
+ errors=None,
72
+ newline=None,
73
+ ):
74
+ self.fs = fs
75
+ self.path = path
76
+ self.mode = mode
77
+ self.compression = get_compression(path, compression)
78
+ self.encoding = encoding
79
+ self.errors = errors
80
+ self.newline = newline
81
+ self.fobjects = []
82
+
83
+ def __reduce__(self):
84
+ return (
85
+ OpenFile,
86
+ (
87
+ self.fs,
88
+ self.path,
89
+ self.mode,
90
+ self.compression,
91
+ self.encoding,
92
+ self.errors,
93
+ self.newline,
94
+ ),
95
+ )
96
+
97
+ def __repr__(self):
98
+ return f"<OpenFile '{self.path}'>"
99
+
100
+ def __enter__(self):
101
+ mode = self.mode.replace("t", "").replace("b", "") + "b"
102
+
103
+ f = self.fs.open(self.path, mode=mode)
104
+
105
+ self.fobjects = [f]
106
+
107
+ if self.compression is not None:
108
+ compress = compr[self.compression]
109
+ f = compress(f, mode=mode[0])
110
+ self.fobjects.append(f)
111
+
112
+ if "b" not in self.mode:
113
+ # assume, for example, that 'r' is equivalent to 'rt' as in builtin
114
+ f = PickleableTextIOWrapper(
115
+ f, encoding=self.encoding, errors=self.errors, newline=self.newline
116
+ )
117
+ self.fobjects.append(f)
118
+
119
+ return self.fobjects[-1]
120
+
121
+ def __exit__(self, *args):
122
+ self.close()
123
+
124
+ @property
125
+ def full_name(self):
126
+ return _unstrip_protocol(self.path, self.fs)
127
+
128
+ def open(self):
129
+ """Materialise this as a real open file without context
130
+
131
+ The OpenFile object should be explicitly closed to avoid enclosed file
132
+ instances persisting. You must, therefore, keep a reference to the OpenFile
133
+ during the life of the file-like it generates.
134
+ """
135
+ return self.__enter__()
136
+
137
+ def close(self):
138
+ """Close all encapsulated file objects"""
139
+ for f in reversed(self.fobjects):
140
+ if "r" not in self.mode and not f.closed:
141
+ f.flush()
142
+ f.close()
143
+ self.fobjects.clear()
144
+
145
+
146
+ class OpenFiles(list):
147
+ """List of OpenFile instances
148
+
149
+ Can be used in a single context, which opens and closes all of the
150
+ contained files. Normal list access to get the elements works as
151
+ normal.
152
+
153
+ A special case is made for caching filesystems - the files will
154
+ be down/uploaded together at the start or end of the context, and
155
+ this may happen concurrently, if the target filesystem supports it.
156
+ """
157
+
158
+ def __init__(self, *args, mode="rb", fs=None):
159
+ self.mode = mode
160
+ self.fs = fs
161
+ self.files = []
162
+ super().__init__(*args)
163
+
164
+ def __enter__(self):
165
+ if self.fs is None:
166
+ raise ValueError("Context has already been used")
167
+
168
+ fs = self.fs
169
+ while True:
170
+ if hasattr(fs, "open_many"):
171
+ # check for concurrent cache download; or set up for upload
172
+ self.files = fs.open_many(self)
173
+ return self.files
174
+ if hasattr(fs, "fs") and fs.fs is not None:
175
+ fs = fs.fs
176
+ else:
177
+ break
178
+ return [s.__enter__() for s in self]
179
+
180
+ def __exit__(self, *args):
181
+ fs = self.fs
182
+ [s.__exit__(*args) for s in self]
183
+ if "r" not in self.mode:
184
+ while True:
185
+ if hasattr(fs, "open_many"):
186
+ # check for concurrent cache upload
187
+ fs.commit_many(self.files)
188
+ return
189
+ if hasattr(fs, "fs") and fs.fs is not None:
190
+ fs = fs.fs
191
+ else:
192
+ break
193
+
194
+ def __getitem__(self, item):
195
+ out = super().__getitem__(item)
196
+ if isinstance(item, slice):
197
+ return OpenFiles(out, mode=self.mode, fs=self.fs)
198
+ return out
199
+
200
+ def __repr__(self):
201
+ return f"<List of {len(self)} OpenFile instances>"
202
+
203
+
204
+ def open_files(
205
+ urlpath,
206
+ mode="rb",
207
+ compression=None,
208
+ encoding="utf8",
209
+ errors=None,
210
+ name_function=None,
211
+ num=1,
212
+ protocol=None,
213
+ newline=None,
214
+ auto_mkdir=True,
215
+ expand=True,
216
+ **kwargs,
217
+ ):
218
+ """Given a path or paths, return a list of ``OpenFile`` objects.
219
+
220
+ For writing, a str path must contain the "*" character, which will be filled
221
+ in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.
222
+
223
+ For either reading or writing, can instead provide explicit list of paths.
224
+
225
+ Parameters
226
+ ----------
227
+ urlpath: string or list
228
+ Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
229
+ to read from alternative filesystems. To read from multiple files you
230
+ can pass a globstring or a list of paths, with the caveat that they
231
+ must all have the same protocol.
232
+ mode: 'rb', 'wt', etc.
233
+ compression: string or None
234
+ If given, open file using compression codec. Can either be a compression
235
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
236
+ compression from the filename suffix.
237
+ encoding: str
238
+ For text mode only
239
+ errors: None or str
240
+ Passed to TextIOWrapper in text mode
241
+ name_function: function or None
242
+ if opening a set of files for writing, those files do not yet exist,
243
+ so we need to generate their names by formatting the urlpath for
244
+ each sequence number
245
+ num: int [1]
246
+ if writing mode, number of files we expect to create (passed to
247
+ name+function)
248
+ protocol: str or None
249
+ If given, overrides the protocol found in the URL.
250
+ newline: bytes or None
251
+ Used for line terminator in text mode. If None, uses system default;
252
+ if blank, uses no translation.
253
+ auto_mkdir: bool (True)
254
+ If in write mode, this will ensure the target directory exists before
255
+ writing, by calling ``fs.mkdirs(exist_ok=True)``.
256
+ expand: bool
257
+ **kwargs: dict
258
+ Extra options that make sense to a particular storage connection, e.g.
259
+ host, port, username, password, etc.
260
+
261
+ Examples
262
+ --------
263
+ >>> files = open_files('2015-*-*.csv') # doctest: +SKIP
264
+ >>> files = open_files(
265
+ ... 's3://bucket/2015-*-*.csv.gz', compression='gzip'
266
+ ... ) # doctest: +SKIP
267
+
268
+ Returns
269
+ -------
270
+ An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
271
+ be used as a single context
272
+
273
+ Notes
274
+ -----
275
+ For a full list of the available protocols and the implementations that
276
+ they map across to see the latest online documentation:
277
+
278
+ - For implementations built into ``fsspec`` see
279
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
280
+ - For implementations in separate packages see
281
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
282
+ """
283
+ fs, fs_token, paths = get_fs_token_paths(
284
+ urlpath,
285
+ mode,
286
+ num=num,
287
+ name_function=name_function,
288
+ storage_options=kwargs,
289
+ protocol=protocol,
290
+ expand=expand,
291
+ )
292
+ if fs.protocol == "file":
293
+ fs.auto_mkdir = auto_mkdir
294
+ elif "r" not in mode and auto_mkdir:
295
+ parents = {fs._parent(path) for path in paths}
296
+ for parent in parents:
297
+ try:
298
+ fs.makedirs(parent, exist_ok=True)
299
+ except PermissionError:
300
+ pass
301
+ return OpenFiles(
302
+ [
303
+ OpenFile(
304
+ fs,
305
+ path,
306
+ mode=mode,
307
+ compression=compression,
308
+ encoding=encoding,
309
+ errors=errors,
310
+ newline=newline,
311
+ )
312
+ for path in paths
313
+ ],
314
+ mode=mode,
315
+ fs=fs,
316
+ )
317
+
318
+
319
+ def _un_chain(path, kwargs):
320
+ x = re.compile(".*[^a-z]+.*") # test for non protocol-like single word
321
+ bits = (
322
+ [p if "://" in p or x.match(p) else p + "://" for p in path.split("::")]
323
+ if "::" in path
324
+ else [path]
325
+ )
326
+ # [[url, protocol, kwargs], ...]
327
+ out = []
328
+ previous_bit = None
329
+ kwargs = kwargs.copy()
330
+ for bit in reversed(bits):
331
+ protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
332
+ cls = get_filesystem_class(protocol)
333
+ extra_kwargs = cls._get_kwargs_from_urls(bit)
334
+ kws = kwargs.pop(protocol, {})
335
+ if bit is bits[0]:
336
+ kws.update(kwargs)
337
+ kw = dict(**extra_kwargs, **kws)
338
+ bit = cls._strip_protocol(bit)
339
+ if (
340
+ protocol in {"blockcache", "filecache", "simplecache"}
341
+ and "target_protocol" not in kw
342
+ ):
343
+ bit = previous_bit
344
+ out.append((bit, protocol, kw))
345
+ previous_bit = bit
346
+ out = list(reversed(out))
347
+ return out
348
+
349
+
350
+ def url_to_fs(url, **kwargs):
351
+ """
352
+ Turn fully-qualified and potentially chained URL into filesystem instance
353
+
354
+ Parameters
355
+ ----------
356
+ url : str
357
+ The fsspec-compatible URL
358
+ **kwargs: dict
359
+ Extra options that make sense to a particular storage connection, e.g.
360
+ host, port, username, password, etc.
361
+
362
+ Returns
363
+ -------
364
+ filesystem : FileSystem
365
+ The new filesystem discovered from ``url`` and created with
366
+ ``**kwargs``.
367
+ urlpath : str
368
+ The file-systems-specific URL for ``url``.
369
+ """
370
+ # non-FS arguments that appear in fsspec.open()
371
+ # inspect could keep this in sync with open()'s signature
372
+ known_kwargs = {
373
+ "compression",
374
+ "encoding",
375
+ "errors",
376
+ "expand",
377
+ "mode",
378
+ "name_function",
379
+ "newline",
380
+ "num",
381
+ }
382
+ kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
383
+ chain = _un_chain(url, kwargs)
384
+ inkwargs = {}
385
+ # Reverse iterate the chain, creating a nested target_* structure
386
+ for i, ch in enumerate(reversed(chain)):
387
+ urls, protocol, kw = ch
388
+ if i == len(chain) - 1:
389
+ inkwargs = dict(**kw, **inkwargs)
390
+ continue
391
+ inkwargs["target_options"] = dict(**kw, **inkwargs)
392
+ inkwargs["target_protocol"] = protocol
393
+ inkwargs["fo"] = urls
394
+ urlpath, protocol, _ = chain[0]
395
+ fs = filesystem(protocol, **inkwargs)
396
+ return fs, urlpath
397
+
398
+
399
+ def open(
400
+ urlpath,
401
+ mode="rb",
402
+ compression=None,
403
+ encoding="utf8",
404
+ errors=None,
405
+ protocol=None,
406
+ newline=None,
407
+ **kwargs,
408
+ ):
409
+ """Given a path or paths, return one ``OpenFile`` object.
410
+
411
+ Parameters
412
+ ----------
413
+ urlpath: string or list
414
+ Absolute or relative filepath. Prefix with a protocol like ``s3://``
415
+ to read from alternative filesystems. Should not include glob
416
+ character(s).
417
+ mode: 'rb', 'wt', etc.
418
+ compression: string or None
419
+ If given, open file using compression codec. Can either be a compression
420
+ name (a key in ``fsspec.compression.compr``) or "infer" to guess the
421
+ compression from the filename suffix.
422
+ encoding: str
423
+ For text mode only
424
+ errors: None or str
425
+ Passed to TextIOWrapper in text mode
426
+ protocol: str or None
427
+ If given, overrides the protocol found in the URL.
428
+ newline: bytes or None
429
+ Used for line terminator in text mode. If None, uses system default;
430
+ if blank, uses no translation.
431
+ **kwargs: dict
432
+ Extra options that make sense to a particular storage connection, e.g.
433
+ host, port, username, password, etc.
434
+
435
+ Examples
436
+ --------
437
+ >>> openfile = open('2015-01-01.csv') # doctest: +SKIP
438
+ >>> openfile = open(
439
+ ... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
440
+ ... ) # doctest: +SKIP
441
+ >>> with openfile as f:
442
+ ... df = pd.read_csv(f) # doctest: +SKIP
443
+ ...
444
+
445
+ Returns
446
+ -------
447
+ ``OpenFile`` object.
448
+
449
+ Notes
450
+ -----
451
+ For a full list of the available protocols and the implementations that
452
+ they map across to see the latest online documentation:
453
+
454
+ - For implementations built into ``fsspec`` see
455
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
456
+ - For implementations in separate packages see
457
+ https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
458
+ """
459
+ out = open_files(
460
+ urlpath=[urlpath],
461
+ mode=mode,
462
+ compression=compression,
463
+ encoding=encoding,
464
+ errors=errors,
465
+ protocol=protocol,
466
+ newline=newline,
467
+ expand=False,
468
+ **kwargs,
469
+ )
470
+ if not out:
471
+ raise FileNotFoundError(urlpath)
472
+ return out[0]
473
+
474
+
475
+ def open_local(
476
+ url: str | list[str] | Path | list[Path],
477
+ mode: str = "rb",
478
+ **storage_options: dict,
479
+ ) -> str | list[str]:
480
+ """Open file(s) which can be resolved to local
481
+
482
+ For files which either are local, or get downloaded upon open
483
+ (e.g., by file caching)
484
+
485
+ Parameters
486
+ ----------
487
+ url: str or list(str)
488
+ mode: str
489
+ Must be read mode
490
+ storage_options:
491
+ passed on to FS for or used by open_files (e.g., compression)
492
+ """
493
+ if "r" not in mode:
494
+ raise ValueError("Can only ensure local files when reading")
495
+ of = open_files(url, mode=mode, **storage_options)
496
+ if not getattr(of[0].fs, "local_file", False):
497
+ raise ValueError(
498
+ "open_local can only be used on a filesystem which"
499
+ " has attribute local_file=True"
500
+ )
501
+ with of as files:
502
+ paths = [f.name for f in files]
503
+ if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
504
+ return paths[0]
505
+ return paths
506
+
507
+
508
+ def get_compression(urlpath, compression):
509
+ if compression == "infer":
510
+ compression = infer_compression(urlpath)
511
+ if compression is not None and compression not in compr:
512
+ raise ValueError(f"Compression type {compression} not supported")
513
+ return compression
514
+
515
+
516
+ def split_protocol(urlpath):
517
+ """Return protocol, path pair"""
518
+ urlpath = stringify_path(urlpath)
519
+ if "://" in urlpath:
520
+ protocol, path = urlpath.split("://", 1)
521
+ if len(protocol) > 1:
522
+ # excludes Windows paths
523
+ return protocol, path
524
+ if urlpath.startswith("data:"):
525
+ return urlpath.split(":", 1)
526
+ return None, urlpath
527
+
528
+
529
+ def strip_protocol(urlpath):
530
+ """Return only path part of full URL, according to appropriate backend"""
531
+ protocol, _ = split_protocol(urlpath)
532
+ cls = get_filesystem_class(protocol)
533
+ return cls._strip_protocol(urlpath)
534
+
535
+
536
+ def expand_paths_if_needed(paths, mode, num, fs, name_function):
537
+ """Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
538
+ in them (read mode).
539
+
540
+ :param paths: list of paths
541
+ mode: str
542
+ Mode in which to open files.
543
+ num: int
544
+ If opening in writing mode, number of files we expect to create.
545
+ fs: filesystem object
546
+ name_function: callable
547
+ If opening in writing mode, this callable is used to generate path
548
+ names. Names are generated for each partition by
549
+ ``urlpath.replace('*', name_function(partition_index))``.
550
+ :return: list of paths
551
+ """
552
+ expanded_paths = []
553
+ paths = list(paths)
554
+
555
+ if "w" in mode: # read mode
556
+ if sum([1 for p in paths if "*" in p]) > 1:
557
+ raise ValueError(
558
+ "When writing data, only one filename mask can be specified."
559
+ )
560
+ num = max(num, len(paths))
561
+
562
+ for curr_path in paths:
563
+ if "*" in curr_path:
564
+ # expand using name_function
565
+ expanded_paths.extend(_expand_paths(curr_path, name_function, num))
566
+ else:
567
+ expanded_paths.append(curr_path)
568
+ # if we generated more paths that asked for, trim the list
569
+ if len(expanded_paths) > num:
570
+ expanded_paths = expanded_paths[:num]
571
+
572
+ else: # read mode
573
+ for curr_path in paths:
574
+ if has_magic(curr_path):
575
+ # expand using glob
576
+ expanded_paths.extend(fs.glob(curr_path))
577
+ else:
578
+ expanded_paths.append(curr_path)
579
+
580
+ return expanded_paths
581
+
582
+
583
+ def get_fs_token_paths(
584
+ urlpath,
585
+ mode="rb",
586
+ num=1,
587
+ name_function=None,
588
+ storage_options=None,
589
+ protocol=None,
590
+ expand=True,
591
+ ):
592
+ """Filesystem, deterministic token, and paths from a urlpath and options.
593
+
594
+ Parameters
595
+ ----------
596
+ urlpath: string or iterable
597
+ Absolute or relative filepath, URL (may include protocols like
598
+ ``s3://``), or globstring pointing to data.
599
+ mode: str, optional
600
+ Mode in which to open files.
601
+ num: int, optional
602
+ If opening in writing mode, number of files we expect to create.
603
+ name_function: callable, optional
604
+ If opening in writing mode, this callable is used to generate path
605
+ names. Names are generated for each partition by
606
+ ``urlpath.replace('*', name_function(partition_index))``.
607
+ storage_options: dict, optional
608
+ Additional keywords to pass to the filesystem class.
609
+ protocol: str or None
610
+ To override the protocol specifier in the URL
611
+ expand: bool
612
+ Expand string paths for writing, assuming the path is a directory
613
+ """
614
+ if isinstance(urlpath, (list, tuple, set)):
615
+ if not urlpath:
616
+ raise ValueError("empty urlpath sequence")
617
+ urlpath0 = stringify_path(list(urlpath)[0])
618
+ else:
619
+ urlpath0 = stringify_path(urlpath)
620
+ storage_options = storage_options or {}
621
+ if protocol:
622
+ storage_options["protocol"] = protocol
623
+ chain = _un_chain(urlpath0, storage_options or {})
624
+ inkwargs = {}
625
+ # Reverse iterate the chain, creating a nested target_* structure
626
+ for i, ch in enumerate(reversed(chain)):
627
+ urls, nested_protocol, kw = ch
628
+ if i == len(chain) - 1:
629
+ inkwargs = dict(**kw, **inkwargs)
630
+ continue
631
+ inkwargs["target_options"] = dict(**kw, **inkwargs)
632
+ inkwargs["target_protocol"] = nested_protocol
633
+ inkwargs["fo"] = urls
634
+ paths, protocol, _ = chain[0]
635
+ fs = filesystem(protocol, **inkwargs)
636
+ if isinstance(urlpath, (list, tuple, set)):
637
+ pchains = [
638
+ _un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
639
+ ]
640
+ if len({pc[1] for pc in pchains}) > 1:
641
+ raise ValueError("Protocol mismatch getting fs from %s", urlpath)
642
+ paths = [pc[0] for pc in pchains]
643
+ else:
644
+ paths = fs._strip_protocol(paths)
645
+ if isinstance(paths, (list, tuple, set)):
646
+ paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
647
+ else:
648
+ if "w" in mode and expand:
649
+ paths = _expand_paths(paths, name_function, num)
650
+ elif "x" in mode and expand:
651
+ paths = _expand_paths(paths, name_function, num)
652
+ elif "*" in paths:
653
+ paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
654
+ else:
655
+ paths = [paths]
656
+
657
+ return fs, fs._fs_token, paths
658
+
659
+
660
+ def _expand_paths(path, name_function, num):
661
+ if isinstance(path, str):
662
+ if path.count("*") > 1:
663
+ raise ValueError("Output path spec must contain exactly one '*'.")
664
+ elif "*" not in path:
665
+ path = os.path.join(path, "*.part")
666
+
667
+ if name_function is None:
668
+ name_function = build_name_function(num - 1)
669
+
670
+ paths = [path.replace("*", name_function(i)) for i in range(num)]
671
+ if paths != sorted(paths):
672
+ logger.warning(
673
+ "In order to preserve order between partitions"
674
+ " paths created with ``name_function`` should "
675
+ "sort to partition order"
676
+ )
677
+ elif isinstance(path, (tuple, list)):
678
+ assert len(path) == num
679
+ paths = list(path)
680
+ else:
681
+ raise ValueError(
682
+ "Path should be either\n"
683
+ "1. A list of paths: ['foo.json', 'bar.json', ...]\n"
684
+ "2. A directory: 'foo/\n"
685
+ "3. A path with a '*' in it: 'foo.*.json'"
686
+ )
687
+ return paths
688
+
689
+
690
+ class PickleableTextIOWrapper(io.TextIOWrapper):
691
+ """TextIOWrapper cannot be pickled. This solves it.
692
+
693
+ Requires that ``buffer`` be pickleable, which all instances of
694
+ AbstractBufferedFile are.
695
+ """
696
+
697
+ def __init__(
698
+ self,
699
+ buffer,
700
+ encoding=None,
701
+ errors=None,
702
+ newline=None,
703
+ line_buffering=False,
704
+ write_through=False,
705
+ ):
706
+ self.args = buffer, encoding, errors, newline, line_buffering, write_through
707
+ super().__init__(*self.args)
708
+
709
+ def __reduce__(self):
710
+ return PickleableTextIOWrapper, self.args
lib/python3.11/site-packages/fsspec/dircache.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from collections.abc import MutableMapping
3
+ from functools import lru_cache
4
+
5
+
6
+ class DirCache(MutableMapping):
7
+ """
8
+ Caching of directory listings, in a structure like::
9
+
10
+ {"path0": [
11
+ {"name": "path0/file0",
12
+ "size": 123,
13
+ "type": "file",
14
+ ...
15
+ },
16
+ {"name": "path0/file1",
17
+ },
18
+ ...
19
+ ],
20
+ "path1": [...]
21
+ }
22
+
23
+ Parameters to this class control listing expiry or indeed turn
24
+ caching off
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ use_listings_cache=True,
30
+ listings_expiry_time=None,
31
+ max_paths=None,
32
+ **kwargs,
33
+ ):
34
+ """
35
+
36
+ Parameters
37
+ ----------
38
+ use_listings_cache: bool
39
+ If False, this cache never returns items, but always reports KeyError,
40
+ and setting items has no effect
41
+ listings_expiry_time: int or float (optional)
42
+ Time in seconds that a listing is considered valid. If None,
43
+ listings do not expire.
44
+ max_paths: int (optional)
45
+ The number of most recent listings that are considered valid; 'recent'
46
+ refers to when the entry was set.
47
+ """
48
+ self._cache = {}
49
+ self._times = {}
50
+ if max_paths:
51
+ self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
52
+ self.use_listings_cache = use_listings_cache
53
+ self.listings_expiry_time = listings_expiry_time
54
+ self.max_paths = max_paths
55
+
56
+ def __getitem__(self, item):
57
+ if self.listings_expiry_time is not None:
58
+ if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
59
+ del self._cache[item]
60
+ if self.max_paths:
61
+ self._q(item)
62
+ return self._cache[item] # maybe raises KeyError
63
+
64
+ def clear(self):
65
+ self._cache.clear()
66
+
67
+ def __len__(self):
68
+ return len(self._cache)
69
+
70
+ def __contains__(self, item):
71
+ try:
72
+ self[item]
73
+ return True
74
+ except KeyError:
75
+ return False
76
+
77
+ def __setitem__(self, key, value):
78
+ if not self.use_listings_cache:
79
+ return
80
+ if self.max_paths:
81
+ self._q(key)
82
+ self._cache[key] = value
83
+ if self.listings_expiry_time is not None:
84
+ self._times[key] = time.time()
85
+
86
+ def __delitem__(self, key):
87
+ del self._cache[key]
88
+
89
+ def __iter__(self):
90
+ entries = list(self._cache)
91
+
92
+ return (k for k in entries if k in self)
93
+
94
+ def __reduce__(self):
95
+ return (
96
+ DirCache,
97
+ (self.use_listings_cache, self.listings_expiry_time, self.max_paths),
98
+ )
lib/python3.11/site-packages/fsspec/exceptions.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ fsspec user-defined exception classes
3
+ """
4
+ import asyncio
5
+
6
+
7
+ class BlocksizeMismatchError(ValueError):
8
+ """
9
+ Raised when a cached file is opened with a different blocksize than it was
10
+ written with
11
+ """
12
+
13
+ ...
14
+
15
+
16
+ class FSTimeoutError(asyncio.TimeoutError):
17
+ """
18
+ Raised when a fsspec function timed out occurs
19
+ """
20
+
21
+ ...