reach-vb HF staff commited on Jan 7

Commit

3962680

•

1 Parent(s): 2d15f4c

6d3bea4b268fcd6555b43f7754e3b8a3bbf7a5122b1e4f66294f0383b953f763

Browse files

Files changed (50) hide show

lib/python3.11/site-packages/filelock-3.13.1.dist-info/INSTALLER +1 -0
lib/python3.11/site-packages/filelock-3.13.1.dist-info/METADATA +56 -0
lib/python3.11/site-packages/filelock-3.13.1.dist-info/RECORD +22 -0
lib/python3.11/site-packages/filelock-3.13.1.dist-info/WHEEL +4 -0
lib/python3.11/site-packages/filelock-3.13.1.dist-info/licenses/LICENSE +24 -0
lib/python3.11/site-packages/filelock/__pycache__/_unix.cpython-311.pyc +0 -0
lib/python3.11/site-packages/filelock/__pycache__/_util.cpython-311.pyc +0 -0
lib/python3.11/site-packages/filelock/__pycache__/_windows.cpython-311.pyc +0 -0
lib/python3.11/site-packages/filelock/__pycache__/version.cpython-311.pyc +0 -0
lib/python3.11/site-packages/filelock/_api.py +323 -0
lib/python3.11/site-packages/filelock/_error.py +30 -0
lib/python3.11/site-packages/filelock/_soft.py +47 -0
lib/python3.11/site-packages/filelock/_unix.py +65 -0
lib/python3.11/site-packages/filelock/_util.py +47 -0
lib/python3.11/site-packages/filelock/_windows.py +65 -0
lib/python3.11/site-packages/filelock/py.typed +0 -0
lib/python3.11/site-packages/filelock/version.py +16 -0
lib/python3.11/site-packages/fsspec/__init__.py +69 -0
lib/python3.11/site-packages/fsspec/__pycache__/__init__.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/caching.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/fuse.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/mapping.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/registry.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/spec.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/__pycache__/utils.cpython-311.pyc +0 -0
lib/python3.11/site-packages/fsspec/_version.py +21 -0
lib/python3.11/site-packages/fsspec/archive.py +73 -0
lib/python3.11/site-packages/fsspec/asyn.py +1081 -0
lib/python3.11/site-packages/fsspec/caching.py +875 -0
lib/python3.11/site-packages/fsspec/callbacks.py +238 -0
lib/python3.11/site-packages/fsspec/compression.py +174 -0
lib/python3.11/site-packages/fsspec/config.py +131 -0
lib/python3.11/site-packages/fsspec/conftest.py +55 -0
lib/python3.11/site-packages/fsspec/core.py +710 -0
lib/python3.11/site-packages/fsspec/dircache.py +98 -0
lib/python3.11/site-packages/fsspec/exceptions.py +21 -0

lib/python3.11/site-packages/filelock-3.13.1.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

lib/python3.11/site-packages/filelock-3.13.1.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,56 @@

+Metadata-Version: 2.1
+Name: filelock
+Version: 3.13.1
+Summary: A platform independent file lock.
+Project-URL: Documentation, https://py-filelock.readthedocs.io
+Project-URL: Homepage, https://github.com/tox-dev/py-filelock
+Project-URL: Source, https://github.com/tox-dev/py-filelock
+Project-URL: Tracker, https://github.com/tox-dev/py-filelock/issues
+Maintainer-email: Bernát Gábor <gaborjbernat@gmail.com>
+License-Expression: Unlicense
+License-File: LICENSE
+Keywords: application,cache,directory,log,user
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: The Unlicense (Unlicense)
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Internet
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Topic :: System
+Requires-Python: >=3.8
+Provides-Extra: docs
+Requires-Dist: furo>=2023.9.10; extra == 'docs'
+Requires-Dist: sphinx-autodoc-typehints!=1.23.4,>=1.24; extra == 'docs'
+Requires-Dist: sphinx>=7.2.6; extra == 'docs'
+Provides-Extra: testing
+Requires-Dist: covdefaults>=2.3; extra == 'testing'
+Requires-Dist: coverage>=7.3.2; extra == 'testing'
+Requires-Dist: diff-cover>=8; extra == 'testing'
+Requires-Dist: pytest-cov>=4.1; extra == 'testing'
+Requires-Dist: pytest-mock>=3.12; extra == 'testing'
+Requires-Dist: pytest-timeout>=2.2; extra == 'testing'
+Requires-Dist: pytest>=7.4.3; extra == 'testing'
+Provides-Extra: typing
+Requires-Dist: typing-extensions>=4.8; python_version < '3.11' and extra == 'typing'
+Description-Content-Type: text/markdown
+# filelock
+[![PyPI](https://img.shields.io/pypi/v/filelock)](https://pypi.org/project/filelock/)
+[![Supported Python
+versions](https://img.shields.io/pypi/pyversions/filelock.svg)](https://pypi.org/project/filelock/)
+[![Documentation
+status](https://readthedocs.org/projects/py-filelock/badge/?version=latest)](https://py-filelock.readthedocs.io/en/latest/?badge=latest)
+[![Code style:
+black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![Downloads](https://static.pepy.tech/badge/filelock/month)](https://pepy.tech/project/filelock)
+[![check](https://github.com/tox-dev/py-filelock/actions/workflows/check.yml/badge.svg)](https://github.com/tox-dev/py-filelock/actions/workflows/check.yml)
+For more information checkout the [official documentation](https://py-filelock.readthedocs.io/en/latest/index.html).

lib/python3.11/site-packages/filelock-3.13.1.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,22 @@

+filelock-3.13.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+filelock-3.13.1.dist-info/METADATA,sha256=gi7LyG-dEuOBZC32wie-OOG0OkPZHABsn9rXvxuQlcA,2784
+filelock-3.13.1.dist-info/RECORD,,
+filelock-3.13.1.dist-info/WHEEL,sha256=9QBuHhg6FNW7lppboF2vKVbCGTVzsFykgRQjjlajrhA,87
+filelock-3.13.1.dist-info/licenses/LICENSE,sha256=iNm062BXnBkew5HKBMFhMFctfu3EqG2qWL8oxuFMm80,1210
+filelock/__init__.py,sha256=wAVZ_9_-3Y14xzzupRk5BTTRewFJekR2vf9oIx4M750,1213
+filelock/__pycache__/__init__.cpython-311.pyc,,
+filelock/__pycache__/_api.cpython-311.pyc,,
+filelock/__pycache__/_error.cpython-311.pyc,,
+filelock/__pycache__/_soft.cpython-311.pyc,,
+filelock/__pycache__/_unix.cpython-311.pyc,,
+filelock/__pycache__/_util.cpython-311.pyc,,
+filelock/__pycache__/_windows.cpython-311.pyc,,
+filelock/__pycache__/version.cpython-311.pyc,,
+filelock/_api.py,sha256=UsVWPEOOgFH1pR_6WMk2b5hWZ7nWhUPT5GZX9WuYaC8,11860
+filelock/_error.py,sha256=-5jMcjTu60YAvAO1UbqDD1GIEjVkwr8xCFwDBtMeYDg,787
+filelock/_soft.py,sha256=haqtc_TB_KJbYv2a8iuEAclKuM4fMG1vTcp28sK919c,1711
+filelock/_unix.py,sha256=ViG38PgJsIhT3xaArugvw0TPP6VWoP2VJj7FEIWypkg,2157
+filelock/_util.py,sha256=dBDlIj1dHL_juXX0Qqq6bZtyE53YZTN8GFhtyTV043o,1708
+filelock/_windows.py,sha256=eMKL8dZKrgekf5VYVGR14an29JGEInRtUO8ui9ABywg,2177
+filelock/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+filelock/version.py,sha256=fmajg3X8ZdOn-UpUewARwK5cfYf4wP4Xa0DcHjigFYo,413

lib/python3.11/site-packages/filelock-3.13.1.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,4 @@

+Wheel-Version: 1.0
+Generator: hatchling 1.18.0
+Root-Is-Purelib: true
+Tag: py3-none-any

lib/python3.11/site-packages/filelock-3.13.1.dist-info/licenses/LICENSE ADDED Viewed

	@@ -0,0 +1,24 @@

+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+For more information, please refer to <http://unlicense.org>

lib/python3.11/site-packages/filelock/__pycache__/_unix.cpython-311.pyc ADDED Viewed

Binary file (3.6 kB). View file

lib/python3.11/site-packages/filelock/__pycache__/_util.cpython-311.pyc ADDED Viewed

Binary file (2.21 kB). View file

lib/python3.11/site-packages/filelock/__pycache__/_windows.cpython-311.pyc ADDED Viewed

Binary file (3.69 kB). View file

lib/python3.11/site-packages/filelock/__pycache__/version.cpython-311.pyc ADDED Viewed

Binary file (681 Bytes). View file

lib/python3.11/site-packages/filelock/_api.py ADDED Viewed

	@@ -0,0 +1,323 @@

+from __future__ import annotations
+import contextlib
+import logging
+import os
+import time
+import warnings
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from threading import local
+from typing import TYPE_CHECKING, Any, ClassVar
+from weakref import WeakValueDictionary
+from ._error import Timeout
+if TYPE_CHECKING:
+    import sys
+    from types import TracebackType
+    if sys.version_info >= (3, 11):  # pragma: no cover (py311+)
+        from typing import Self
+    else:  # pragma: no cover (<py311)
+        from typing_extensions import Self
+_LOGGER = logging.getLogger("filelock")
+# This is a helper class which is returned by :meth:`BaseFileLock.acquire` and wraps the lock to make sure __enter__
+# is not called twice when entering the with statement. If we would simply return *self*, the lock would be acquired
+# again in the *__enter__* method of the BaseFileLock, but not released again automatically. issue #37 (memory leak)
+class AcquireReturnProxy:
+    """A context aware object that will release the lock file when exiting."""
+    def __init__(self, lock: BaseFileLock) -> None:
+        self.lock = lock
+    def __enter__(self) -> BaseFileLock:
+        return self.lock
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        self.lock.release()
+@dataclass
+class FileLockContext:
+    """A dataclass which holds the context for a ``BaseFileLock`` object."""
+    # The context is held in a separate class to allow optional use of thread local storage via the
+    # ThreadLocalFileContext class.
+    #: The path to the lock file.
+    lock_file: str
+    #: The default timeout value.
+    timeout: float
+    #: The mode for the lock files
+    mode: int
+    #: The file descriptor for the *_lock_file* as it is returned by the os.open() function, not None when lock held
+    lock_file_fd: int | None = None
+    #: The lock counter is used for implementing the nested locking mechanism.
+    lock_counter: int = 0  # When the lock is acquired is increased and the lock is only released, when this value is 0
+class ThreadLocalFileContext(FileLockContext, local):
+    """A thread local version of the ``FileLockContext`` class."""
+class BaseFileLock(ABC, contextlib.ContextDecorator):
+    """Abstract base class for a file lock object."""
+    _instances: ClassVar[WeakValueDictionary[str, BaseFileLock]] = WeakValueDictionary()
+    def __new__(  # noqa: PLR0913
+        cls,
+        lock_file: str | os.PathLike[str],
+        timeout: float = -1,  # noqa: ARG003
+        mode: int = 0o644,  # noqa: ARG003
+        thread_local: bool = True,  # noqa: ARG003, FBT001, FBT002
+        *,
+        is_singleton: bool = False,
+        **kwargs: dict[str, Any],  # capture remaining kwargs for subclasses  # noqa: ARG003
+    ) -> Self:
+        """Create a new lock object or if specified return the singleton instance for the lock file."""
+        if not is_singleton:
+            return super().__new__(cls)
+        instance = cls._instances.get(str(lock_file))
+        if not instance:
+            instance = super().__new__(cls)
+            cls._instances[str(lock_file)] = instance
+        return instance  # type: ignore[return-value] # https://github.com/python/mypy/issues/15322
+    def __init__(  # noqa: PLR0913
+        self,
+        lock_file: str | os.PathLike[str],
+        timeout: float = -1,
+        mode: int = 0o644,
+        thread_local: bool = True,  # noqa: FBT001, FBT002
+        *,
+        is_singleton: bool = False,
+    ) -> None:
+        """
+        Create a new lock object.
+        :param lock_file: path to the file
+        :param timeout: default timeout when acquiring the lock, in seconds. It will be used as fallback value in \
+            the acquire method, if no timeout value (``None``) is given. If you want to disable the timeout, set it \
+            to a negative value. A timeout of 0 means, that there is exactly one attempt to acquire the file lock.
+        :param mode: file permissions for the lockfile
+        :param thread_local: Whether this object's internal context should be thread local or not. If this is set to \
+            ``False`` then the lock will be reentrant across threads.
+        :param is_singleton: If this is set to ``True`` then only one instance of this class will be created \
+            per lock file. This is useful if you want to use the lock object for reentrant locking without needing \
+            to pass the same object around.
+        """
+        self._is_thread_local = thread_local
+        self._is_singleton = is_singleton
+        # Create the context. Note that external code should not work with the context directly  and should instead use
+        # properties of this class.
+        kwargs: dict[str, Any] = {
+            "lock_file": os.fspath(lock_file),
+            "timeout": timeout,
+            "mode": mode,
+        }
+        self._context: FileLockContext = (ThreadLocalFileContext if thread_local else FileLockContext)(**kwargs)
+    def is_thread_local(self) -> bool:
+        """:return: a flag indicating if this lock is thread local or not"""
+        return self._is_thread_local
+    @property
+    def is_singleton(self) -> bool:
+        """:return: a flag indicating if this lock is singleton or not"""
+        return self._is_singleton
+    @property
+    def lock_file(self) -> str:
+        """:return: path to the lock file"""
+        return self._context.lock_file
+    @property
+    def timeout(self) -> float:
+        """
+        :return: the default timeout value, in seconds
+        .. versionadded:: 2.0.0
+        """
+        return self._context.timeout
+    @timeout.setter
+    def timeout(self, value: float | str) -> None:
+        """
+        Change the default timeout value.
+        :param value: the new value, in seconds
+        """
+        self._context.timeout = float(value)
+    @abstractmethod
+    def _acquire(self) -> None:
+        """If the file lock could be acquired, self._context.lock_file_fd holds the file descriptor of the lock file."""
+        raise NotImplementedError
+    @abstractmethod
+    def _release(self) -> None:
+        """Releases the lock and sets self._context.lock_file_fd to None."""
+        raise NotImplementedError
+    @property
+    def is_locked(self) -> bool:
+        """
+        :return: A boolean indicating if the lock file is holding the lock currently.
+        .. versionchanged:: 2.0.0
+            This was previously a method and is now a property.
+        """
+        return self._context.lock_file_fd is not None
+    @property
+    def lock_counter(self) -> int:
+        """:return: The number of times this lock has been acquired (but not yet released)."""
+        return self._context.lock_counter
+    def acquire(
+        self,
+        timeout: float | None = None,
+        poll_interval: float = 0.05,
+        *,
+        poll_intervall: float | None = None,
+        blocking: bool = True,
+    ) -> AcquireReturnProxy:
+        """
+        Try to acquire the file lock.
+        :param timeout: maximum wait time for acquiring the lock, ``None`` means use the default :attr:`~timeout` is and
+         if ``timeout < 0``, there is no timeout and this method will block until the lock could be acquired
+        :param poll_interval: interval of trying to acquire the lock file
+        :param poll_intervall: deprecated, kept for backwards compatibility, use ``poll_interval`` instead
+        :param blocking: defaults to True. If False, function will return immediately if it cannot obtain a lock on the
+         first attempt. Otherwise, this method will block until the timeout expires or the lock is acquired.
+        :raises Timeout: if fails to acquire lock within the timeout period
+        :return: a context object that will unlock the file when the context is exited
+        .. code-block:: python
+            # You can use this method in the context manager (recommended)
+            with lock.acquire():
+                pass
+            # Or use an equivalent try-finally construct:
+            lock.acquire()
+            try:
+                pass
+            finally:
+                lock.release()
+        .. versionchanged:: 2.0.0
+            This method returns now a *proxy* object instead of *self*,
+            so that it can be used in a with statement without side effects.
+        """
+        # Use the default timeout, if no timeout is provided.
+        if timeout is None:
+            timeout = self._context.timeout
+        if poll_intervall is not None:
+            msg = "use poll_interval instead of poll_intervall"
+            warnings.warn(msg, DeprecationWarning, stacklevel=2)
+            poll_interval = poll_intervall
+        # Increment the number right at the beginning. We can still undo it, if something fails.
+        self._context.lock_counter += 1
+        lock_id = id(self)
+        lock_filename = self.lock_file
+        start_time = time.perf_counter()
+        try:
+            while True:
+                if not self.is_locked:
+                    _LOGGER.debug("Attempting to acquire lock %s on %s", lock_id, lock_filename)
+                    self._acquire()
+                if self.is_locked:
+                    _LOGGER.debug("Lock %s acquired on %s", lock_id, lock_filename)
+                    break
+                if blocking is False:
+                    _LOGGER.debug("Failed to immediately acquire lock %s on %s", lock_id, lock_filename)
+                    raise Timeout(lock_filename)  # noqa: TRY301
+                if 0 <= timeout < time.perf_counter() - start_time:
+                    _LOGGER.debug("Timeout on acquiring lock %s on %s", lock_id, lock_filename)
+                    raise Timeout(lock_filename)  # noqa: TRY301
+                msg = "Lock %s not acquired on %s, waiting %s seconds ..."
+                _LOGGER.debug(msg, lock_id, lock_filename, poll_interval)
+                time.sleep(poll_interval)
+        except BaseException:  # Something did go wrong, so decrement the counter.
+            self._context.lock_counter = max(0, self._context.lock_counter - 1)
+            raise
+        return AcquireReturnProxy(lock=self)
+    def release(self, force: bool = False) -> None:  # noqa: FBT001, FBT002
+        """
+        Releases the file lock. Please note, that the lock is only completely released, if the lock counter is 0. Also
+        note, that the lock file itself is not automatically deleted.
+        :param force: If true, the lock counter is ignored and the lock is released in every case/
+        """
+        if self.is_locked:
+            self._context.lock_counter -= 1
+            if self._context.lock_counter == 0 or force:
+                lock_id, lock_filename = id(self), self.lock_file
+                _LOGGER.debug("Attempting to release lock %s on %s", lock_id, lock_filename)
+                self._release()
+                self._context.lock_counter = 0
+                _LOGGER.debug("Lock %s released on %s", lock_id, lock_filename)
+    def __enter__(self) -> Self:
+        """
+        Acquire the lock.
+        :return: the lock object
+        """
+        self.acquire()
+        return self
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_value: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> None:
+        """
+        Release the lock.
+        :param exc_type: the exception type if raised
+        :param exc_value: the exception value if raised
+        :param traceback: the exception traceback if raised
+        """
+        self.release()
+    def __del__(self) -> None:
+        """Called when the lock object is deleted."""
+        self.release(force=True)
+__all__ = [
+    "BaseFileLock",
+    "AcquireReturnProxy",
+]

lib/python3.11/site-packages/filelock/_error.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from __future__ import annotations
+from typing import Any
+class Timeout(TimeoutError):  # noqa: N818
+    """Raised when the lock could not be acquired in *timeout* seconds."""
+    def __init__(self, lock_file: str) -> None:
+        super().__init__()
+        self._lock_file = lock_file
+    def __reduce__(self) -> str | tuple[Any, ...]:
+        return self.__class__, (self._lock_file,)  # Properly pickle the exception
+    def __str__(self) -> str:
+        return f"The file lock '{self._lock_file}' could not be acquired."
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({self.lock_file!r})"
+    @property
+    def lock_file(self) -> str:
+        """:return: The path of the file lock."""
+        return self._lock_file
+__all__ = [
+    "Timeout",
+]

lib/python3.11/site-packages/filelock/_soft.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from __future__ import annotations
+import os
+import sys
+from contextlib import suppress
+from errno import EACCES, EEXIST
+from pathlib import Path
+from ._api import BaseFileLock
+from ._util import ensure_directory_exists, raise_on_not_writable_file
+class SoftFileLock(BaseFileLock):
+    """Simply watches the existence of the lock file."""
+    def _acquire(self) -> None:
+        raise_on_not_writable_file(self.lock_file)
+        ensure_directory_exists(self.lock_file)
+        # first check for exists and read-only mode as the open will mask this case as EEXIST
+        flags = (
+            os.O_WRONLY  # open for writing only
+            | os.O_CREAT
+            | os.O_EXCL  # together with above raise EEXIST if the file specified by filename exists
+            | os.O_TRUNC  # truncate the file to zero byte
+        )
+        try:
+            file_handler = os.open(self.lock_file, flags, self._context.mode)
+        except OSError as exception:  # re-raise unless expected exception
+            if not (
+                exception.errno == EEXIST  # lock already exist
+                or (exception.errno == EACCES and sys.platform == "win32")  # has no access to this lock
+            ):  # pragma: win32 no cover
+                raise
+        else:
+            self._context.lock_file_fd = file_handler
+    def _release(self) -> None:
+        assert self._context.lock_file_fd is not None  # noqa: S101
+        os.close(self._context.lock_file_fd)  # the lock file is definitely not None
+        self._context.lock_file_fd = None
+        with suppress(OSError):  # the file is already deleted and that's what we want
+            Path(self.lock_file).unlink()
+__all__ = [
+    "SoftFileLock",
+]

lib/python3.11/site-packages/filelock/_unix.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from __future__ import annotations
+import os
+import sys
+from contextlib import suppress
+from errno import ENOSYS
+from typing import cast
+from ._api import BaseFileLock
+from ._util import ensure_directory_exists
+#: a flag to indicate if the fcntl API is available
+has_fcntl = False
+if sys.platform == "win32":  # pragma: win32 cover
+    class UnixFileLock(BaseFileLock):
+        """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
+        def _acquire(self) -> None:
+            raise NotImplementedError
+        def _release(self) -> None:
+            raise NotImplementedError
+else:  # pragma: win32 no cover
+    try:
+        import fcntl
+    except ImportError:
+        pass
+    else:
+        has_fcntl = True
+    class UnixFileLock(BaseFileLock):
+        """Uses the :func:`fcntl.flock` to hard lock the lock file on unix systems."""
+        def _acquire(self) -> None:
+            ensure_directory_exists(self.lock_file)
+            open_flags = os.O_RDWR | os.O_CREAT | os.O_TRUNC
+            fd = os.open(self.lock_file, open_flags, self._context.mode)
+            with suppress(PermissionError):  # This locked is not owned by this UID
+                os.fchmod(fd, self._context.mode)
+            try:
+                fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+            except OSError as exception:
+                os.close(fd)
+                if exception.errno == ENOSYS:  # NotImplemented error
+                    msg = "FileSystem does not appear to support flock; user SoftFileLock instead"
+                    raise NotImplementedError(msg) from exception
+            else:
+                self._context.lock_file_fd = fd
+        def _release(self) -> None:
+            # Do not remove the lockfile:
+            #   https://github.com/tox-dev/py-filelock/issues/31
+            #   https://stackoverflow.com/questions/17708885/flock-removing-locked-file-without-race-condition
+            fd = cast(int, self._context.lock_file_fd)
+            self._context.lock_file_fd = None
+            fcntl.flock(fd, fcntl.LOCK_UN)
+            os.close(fd)
+__all__ = [
+    "has_fcntl",
+    "UnixFileLock",
+]

lib/python3.11/site-packages/filelock/_util.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from __future__ import annotations
+import os
+import stat
+import sys
+from errno import EACCES, EISDIR
+from pathlib import Path
+def raise_on_not_writable_file(filename: str) -> None:
+    """
+    Raise an exception if attempting to open the file for writing would fail.
+    This is done so files that will never be writable can be separated from
+    files that are writable but currently locked
+    :param filename: file to check
+    :raises OSError: as if the file was opened for writing.
+    """
+    try:  # use stat to do exists + can write to check without race condition
+        file_stat = os.stat(filename)  # noqa: PTH116
+    except OSError:
+        return  # swallow does not exist or other errors
+    if file_stat.st_mtime != 0:  # if os.stat returns but modification is zero that's an invalid os.stat - ignore it
+        if not (file_stat.st_mode & stat.S_IWUSR):
+            raise PermissionError(EACCES, "Permission denied", filename)
+        if stat.S_ISDIR(file_stat.st_mode):
+            if sys.platform == "win32":  # pragma: win32 cover
+                # On Windows, this is PermissionError
+                raise PermissionError(EACCES, "Permission denied", filename)
+            else:  # pragma: win32 no cover # noqa: RET506
+                # On linux / macOS, this is IsADirectoryError
+                raise IsADirectoryError(EISDIR, "Is a directory", filename)
+def ensure_directory_exists(filename: Path | str) -> None:
+    """
+    Ensure the directory containing the file exists (create it if necessary)
+    :param filename: file.
+    """
+    Path(filename).parent.mkdir(parents=True, exist_ok=True)
+__all__ = [
+    "raise_on_not_writable_file",
+    "ensure_directory_exists",
+]

lib/python3.11/site-packages/filelock/_windows.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from __future__ import annotations
+import os
+import sys
+from contextlib import suppress
+from errno import EACCES
+from pathlib import Path
+from typing import cast
+from ._api import BaseFileLock
+from ._util import ensure_directory_exists, raise_on_not_writable_file
+if sys.platform == "win32":  # pragma: win32 cover
+    import msvcrt
+    class WindowsFileLock(BaseFileLock):
+        """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
+        def _acquire(self) -> None:
+            raise_on_not_writable_file(self.lock_file)
+            ensure_directory_exists(self.lock_file)
+            flags = (
+                os.O_RDWR  # open for read and write
+                | os.O_CREAT  # create file if not exists
+                | os.O_TRUNC  # truncate file if not empty
+            )
+            try:
+                fd = os.open(self.lock_file, flags, self._context.mode)
+            except OSError as exception:
+                if exception.errno != EACCES:  # has no access to this lock
+                    raise
+            else:
+                try:
+                    msvcrt.locking(fd, msvcrt.LK_NBLCK, 1)
+                except OSError as exception:
+                    os.close(fd)  # close file first
+                    if exception.errno != EACCES:  # file is already locked
+                        raise
+                else:
+                    self._context.lock_file_fd = fd
+        def _release(self) -> None:
+            fd = cast(int, self._context.lock_file_fd)
+            self._context.lock_file_fd = None
+            msvcrt.locking(fd, msvcrt.LK_UNLCK, 1)
+            os.close(fd)
+            with suppress(OSError):  # Probably another instance of the application hat acquired the file lock.
+                Path(self.lock_file).unlink()
+else:  # pragma: win32 no cover
+    class WindowsFileLock(BaseFileLock):
+        """Uses the :func:`msvcrt.locking` function to hard lock the lock file on Windows systems."""
+        def _acquire(self) -> None:
+            raise NotImplementedError
+        def _release(self) -> None:
+            raise NotImplementedError
+__all__ = [
+    "WindowsFileLock",
+]

lib/python3.11/site-packages/filelock/py.typed ADDED Viewed

File without changes

lib/python3.11/site-packages/filelock/version.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# file generated by setuptools_scm
+# don't change, don't track in version control
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple, Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+    VERSION_TUPLE = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+__version__ = version = '3.13.1'
+__version_tuple__ = version_tuple = (3, 13, 1)

lib/python3.11/site-packages/fsspec/__init__.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from importlib.metadata import entry_points
+from . import _version, caching
+from .callbacks import Callback
+from .compression import available_compressions
+from .core import get_fs_token_paths, open, open_files, open_local
+from .exceptions import FSTimeoutError
+from .mapping import FSMap, get_mapper
+from .registry import (
+    available_protocols,
+    filesystem,
+    get_filesystem_class,
+    register_implementation,
+    registry,
+)
+from .spec import AbstractFileSystem
+__version__ = _version.get_versions()["version"]
+__all__ = [
+    "AbstractFileSystem",
+    "FSTimeoutError",
+    "FSMap",
+    "filesystem",
+    "register_implementation",
+    "get_filesystem_class",
+    "get_fs_token_paths",
+    "get_mapper",
+    "open",
+    "open_files",
+    "open_local",
+    "registry",
+    "caching",
+    "Callback",
+    "available_protocols",
+    "available_compressions",
+]
+def process_entries():
+    if entry_points is not None:
+        try:
+            eps = entry_points()
+        except TypeError:
+            pass  # importlib-metadata < 0.8
+        else:
+            if hasattr(eps, "select"):  # Python 3.10+ / importlib_metadata >= 3.9.0
+                specs = eps.select(group="fsspec.specs")
+            else:
+                specs = eps.get("fsspec.specs", [])
+            registered_names = {}
+            for spec in specs:
+                err_msg = f"Unable to load filesystem from {spec}"
+                name = spec.name
+                if name in registered_names:
+                    continue
+                registered_names[name] = True
+                register_implementation(
+                    name,
+                    spec.value.replace(":", "."),
+                    errtxt=err_msg,
+                    # We take our implementations as the ones to overload with if
+                    # for some reason we encounter some, may be the same, already
+                    # registered
+                    clobber=True,
+                )
+process_entries()

lib/python3.11/site-packages/fsspec/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (2.26 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/_version.cpython-311.pyc ADDED Viewed

Binary file (630 Bytes). View file

lib/python3.11/site-packages/fsspec/__pycache__/archive.cpython-311.pyc ADDED Viewed

Binary file (4.81 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/asyn.cpython-311.pyc ADDED Viewed

Binary file (51.9 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/caching.cpython-311.pyc ADDED Viewed

Binary file (37.6 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/callbacks.cpython-311.pyc ADDED Viewed

Binary file (10 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/compression.cpython-311.pyc ADDED Viewed

Binary file (8.07 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (6.71 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/conftest.cpython-311.pyc ADDED Viewed

Binary file (3.44 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/core.cpython-311.pyc ADDED Viewed

Binary file (30.7 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/dircache.cpython-311.pyc ADDED Viewed

Binary file (4.77 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/exceptions.cpython-311.pyc ADDED Viewed

Binary file (991 Bytes). View file

lib/python3.11/site-packages/fsspec/__pycache__/fuse.cpython-311.pyc ADDED Viewed

Binary file (17.1 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/generic.cpython-311.pyc ADDED Viewed

Binary file (21.7 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/gui.cpython-311.pyc ADDED Viewed

Binary file (23.3 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/mapping.cpython-311.pyc ADDED Viewed

Binary file (13.6 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/parquet.cpython-311.pyc ADDED Viewed

Binary file (17.8 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/registry.cpython-311.pyc ADDED Viewed

Binary file (11.3 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/spec.cpython-311.pyc ADDED Viewed

Binary file (88.5 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/transaction.cpython-311.pyc ADDED Viewed

Binary file (5.01 kB). View file

lib/python3.11/site-packages/fsspec/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (32.1 kB). View file

lib/python3.11/site-packages/fsspec/_version.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# This file was generated by 'versioneer.py' (0.29) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+import json
+version_json = '''
+{
+ "date": "2023-12-11T16:18:48-0500",
+ "dirty": false,
+ "error": null,
+ "full-revisionid": "dd8cb9bf620be4d9153e854dd1431c23a2be6db0",
+ "version": "2023.12.2"
+}
+'''  # END VERSION_JSON
+def get_versions():
+    return json.loads(version_json)

lib/python3.11/site-packages/fsspec/archive.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from fsspec import AbstractFileSystem
+from fsspec.utils import tokenize
+class AbstractArchiveFileSystem(AbstractFileSystem):
+    """
+    A generic superclass for implementing Archive-based filesystems.
+    Currently, it is shared amongst
+    :class:`~fsspec.implementations.zip.ZipFileSystem`,
+    :class:`~fsspec.implementations.libarchive.LibArchiveFileSystem` and
+    :class:`~fsspec.implementations.tar.TarFileSystem`.
+    """
+    def __str__(self):
+        return f"<Archive-like object {type(self).__name__} at {id(self)}>"
+    __repr__ = __str__
+    def ukey(self, path):
+        return tokenize(path, self.fo, self.protocol)
+    def _all_dirnames(self, paths):
+        """Returns *all* directory names for each path in paths, including intermediate
+        ones.
+        Parameters
+        ----------
+        paths: Iterable of path strings
+        """
+        if len(paths) == 0:
+            return set()
+        dirnames = {self._parent(path) for path in paths} - {self.root_marker}
+        return dirnames | self._all_dirnames(dirnames)
+    def info(self, path, **kwargs):
+        self._get_dirs()
+        path = self._strip_protocol(path)
+        if path in {"", "/"} and self.dir_cache:
+            return {"name": "", "type": "directory", "size": 0}
+        if path in self.dir_cache:
+            return self.dir_cache[path]
+        elif path + "/" in self.dir_cache:
+            return self.dir_cache[path + "/"]
+        else:
+            raise FileNotFoundError(path)
+    def ls(self, path, detail=True, **kwargs):
+        self._get_dirs()
+        paths = {}
+        for p, f in self.dir_cache.items():
+            p = p.rstrip("/")
+            if "/" in p:
+                root = p.rsplit("/", 1)[0]
+            else:
+                root = ""
+            if root == path.rstrip("/"):
+                paths[p] = f
+            elif all(
+                (a == b)
+                for a, b in zip(path.split("/"), [""] + p.strip("/").split("/"))
+            ):
+                # root directory entry
+                ppath = p.rstrip("/").split("/", 1)[0]
+                if ppath not in paths:
+                    out = {"name": ppath, "size": 0, "type": "directory"}
+                    paths[ppath] = out
+        if detail:
+            out = sorted(paths.values(), key=lambda _: _["name"])
+            return out
+        else:
+            return sorted(paths)

lib/python3.11/site-packages/fsspec/asyn.py ADDED Viewed

	@@ -0,0 +1,1081 @@

+import asyncio
+import asyncio.events
+import functools
+import inspect
+import io
+import numbers
+import os
+import re
+import threading
+from contextlib import contextmanager
+from glob import has_magic
+from typing import TYPE_CHECKING, Iterable
+from .callbacks import _DEFAULT_CALLBACK
+from .exceptions import FSTimeoutError
+from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep
+from .spec import AbstractBufferedFile, AbstractFileSystem
+from .utils import glob_translate, is_exception, other_paths
+private = re.compile("_[^_]")
+iothread = [None]  # dedicated fsspec IO thread
+loop = [None]  # global event loop for any non-async instance
+_lock = None  # global lock placeholder
+get_running_loop = asyncio.get_running_loop
+def get_lock():
+    """Allocate or return a threading lock.
+    The lock is allocated on first use to allow setting one lock per forked process.
+    """
+    global _lock
+    if not _lock:
+        _lock = threading.Lock()
+    return _lock
+def reset_lock():
+    """Reset the global lock.
+    This should be called only on the init of a forked process to reset the lock to
+    None, enabling the new forked process to get a new lock.
+    """
+    global _lock
+    iothread[0] = None
+    loop[0] = None
+    _lock = None
+async def _runner(event, coro, result, timeout=None):
+    timeout = timeout if timeout else None  # convert 0 or 0.0 to None
+    if timeout is not None:
+        coro = asyncio.wait_for(coro, timeout=timeout)
+    try:
+        result[0] = await coro
+    except Exception as ex:
+        result[0] = ex
+    finally:
+        event.set()
+def sync(loop, func, *args, timeout=None, **kwargs):
+    """
+    Make loop run coroutine until it returns. Runs in other thread
+    Examples
+    --------
+    >>> fsspec.asyn.sync(fsspec.asyn.get_loop(), func, *args,
+                         timeout=timeout, **kwargs)
+    """
+    timeout = timeout if timeout else None  # convert 0 or 0.0 to None
+    # NB: if the loop is not running *yet*, it is OK to submit work
+    # and we will wait for it
+    if loop is None or loop.is_closed():
+        raise RuntimeError("Loop is not running")
+    try:
+        loop0 = asyncio.events.get_running_loop()
+        if loop0 is loop:
+            raise NotImplementedError("Calling sync() from within a running loop")
+    except NotImplementedError:
+        raise
+    except RuntimeError:
+        pass
+    coro = func(*args, **kwargs)
+    result = [None]
+    event = threading.Event()
+    asyncio.run_coroutine_threadsafe(_runner(event, coro, result, timeout), loop)
+    while True:
+        # this loops allows thread to get interrupted
+        if event.wait(1):
+            break
+        if timeout is not None:
+            timeout -= 1
+            if timeout < 0:
+                raise FSTimeoutError
+    return_result = result[0]
+    if isinstance(return_result, asyncio.TimeoutError):
+        # suppress asyncio.TimeoutError, raise FSTimeoutError
+        raise FSTimeoutError from return_result
+    elif isinstance(return_result, BaseException):
+        raise return_result
+    else:
+        return return_result
+def sync_wrapper(func, obj=None):
+    """Given a function, make so can be called in blocking contexts
+    Leave obj=None if defining within a class. Pass the instance if attaching
+    as an attribute of the instance.
+    """
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        self = obj or args[0]
+        return sync(self.loop, func, *args, **kwargs)
+    return wrapper
+@contextmanager
+def _selector_policy():
+    original_policy = asyncio.get_event_loop_policy()
+    try:
+        if os.name == "nt" and hasattr(asyncio, "WindowsSelectorEventLoopPolicy"):
+            asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+        yield
+    finally:
+        asyncio.set_event_loop_policy(original_policy)
+def get_loop():
+    """Create or return the default fsspec IO loop
+    The loop will be running on a separate thread.
+    """
+    if loop[0] is None:
+        with get_lock():
+            # repeat the check just in case the loop got filled between the
+            # previous two calls from another thread
+            if loop[0] is None:
+                with _selector_policy():
+                    loop[0] = asyncio.new_event_loop()
+                th = threading.Thread(target=loop[0].run_forever, name="fsspecIO")
+                th.daemon = True
+                th.start()
+                iothread[0] = th
+    return loop[0]
+if TYPE_CHECKING:
+    import resource
+    ResourceError = resource.error
+else:
+    try:
+        import resource
+    except ImportError:
+        resource = None
+        ResourceError = OSError
+    else:
+        ResourceError = getattr(resource, "error", OSError)
+_DEFAULT_BATCH_SIZE = 128
+_NOFILES_DEFAULT_BATCH_SIZE = 1280
+def _get_batch_size(nofiles=False):
+    from fsspec.config import conf
+    if nofiles:
+        if "nofiles_gather_batch_size" in conf:
+            return conf["nofiles_gather_batch_size"]
+    else:
+        if "gather_batch_size" in conf:
+            return conf["gather_batch_size"]
+    if nofiles:
+        return _NOFILES_DEFAULT_BATCH_SIZE
+    if resource is None:
+        return _DEFAULT_BATCH_SIZE
+    try:
+        soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
+    except (ImportError, ValueError, ResourceError):
+        return _DEFAULT_BATCH_SIZE
+    if soft_limit == resource.RLIM_INFINITY:
+        return -1
+    else:
+        return soft_limit // 8
+def running_async() -> bool:
+    """Being executed by an event loop?"""
+    try:
+        asyncio.get_running_loop()
+        return True
+    except RuntimeError:
+        return False
+async def _run_coros_in_chunks(
+    coros,
+    batch_size=None,
+    callback=_DEFAULT_CALLBACK,
+    timeout=None,
+    return_exceptions=False,
+    nofiles=False,
+):
+    """Run the given coroutines in  chunks.
+    Parameters
+    ----------
+    coros: list of coroutines to run
+    batch_size: int or None
+        Number of coroutines to submit/wait on simultaneously.
+        If -1, then it will not be any throttling. If
+        None, it will be inferred from _get_batch_size()
+    callback: fsspec.callbacks.Callback instance
+        Gets a relative_update when each coroutine completes
+    timeout: number or None
+        If given, each coroutine times out after this time. Note that, since
+        there are multiple batches, the total run time of this function will in
+        general be longer
+    return_exceptions: bool
+        Same meaning as in asyncio.gather
+    nofiles: bool
+        If inferring the batch_size, does this operation involve local files?
+        If yes, you normally expect smaller batches.
+    """
+    if batch_size is None:
+        batch_size = _get_batch_size(nofiles=nofiles)
+    if batch_size == -1:
+        batch_size = len(coros)
+    assert batch_size > 0
+    results = []
+    for start in range(0, len(coros), batch_size):
+        chunk = [
+            asyncio.Task(asyncio.wait_for(c, timeout=timeout))
+            for c in coros[start : start + batch_size]
+        ]
+        if callback is not _DEFAULT_CALLBACK:
+            [
+                t.add_done_callback(lambda *_, **__: callback.relative_update(1))
+                for t in chunk
+            ]
+        results.extend(
+            await asyncio.gather(*chunk, return_exceptions=return_exceptions),
+        )
+    return results
+# these methods should be implemented as async by any async-able backend
+async_methods = [
+    "_ls",
+    "_cat_file",
+    "_get_file",
+    "_put_file",
+    "_rm_file",
+    "_cp_file",
+    "_pipe_file",
+    "_expand_path",
+    "_info",
+    "_isfile",
+    "_isdir",
+    "_exists",
+    "_walk",
+    "_glob",
+    "_find",
+    "_du",
+    "_size",
+    "_mkdir",
+    "_makedirs",
+]
+class AsyncFileSystem(AbstractFileSystem):
+    """Async file operations, default implementations
+    Passes bulk operations to asyncio.gather for concurrent operation.
+    Implementations that have concurrent batch operations and/or async methods
+    should inherit from this class instead of AbstractFileSystem. Docstrings are
+    copied from the un-underscored method in AbstractFileSystem, if not given.
+    """
+    # note that methods do not have docstring here; they will be copied
+    # for _* methods and inferred for overridden methods.
+    async_impl = True
+    mirror_sync_methods = True
+    disable_throttling = False
+    def __init__(self, *args, asynchronous=False, loop=None, batch_size=None, **kwargs):
+        self.asynchronous = asynchronous
+        self._pid = os.getpid()
+        if not asynchronous:
+            self._loop = loop or get_loop()
+        else:
+            self._loop = None
+        self.batch_size = batch_size
+        super().__init__(*args, **kwargs)
+    @property
+    def loop(self):
+        if self._pid != os.getpid():
+            raise RuntimeError("This class is not fork-safe")
+        return self._loop
+    async def _rm_file(self, path, **kwargs):
+        raise NotImplementedError
+    async def _rm(self, path, recursive=False, batch_size=None, **kwargs):
+        # TODO: implement on_error
+        batch_size = batch_size or self.batch_size
+        path = await self._expand_path(path, recursive=recursive)
+        return await _run_coros_in_chunks(
+            [self._rm_file(p, **kwargs) for p in reversed(path)],
+            batch_size=batch_size,
+            nofiles=True,
+        )
+    async def _cp_file(self, path1, path2, **kwargs):
+        raise NotImplementedError
+    async def _copy(
+        self,
+        path1,
+        path2,
+        recursive=False,
+        on_error=None,
+        maxdepth=None,
+        batch_size=None,
+        **kwargs,
+    ):
+        if on_error is None and recursive:
+            on_error = "ignore"
+        elif on_error is None:
+            on_error = "raise"
+        if isinstance(path1, list) and isinstance(path2, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            paths1 = path1
+            paths2 = path2
+        else:
+            source_is_str = isinstance(path1, str)
+            paths1 = await self._expand_path(
+                path1, maxdepth=maxdepth, recursive=recursive
+            )
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                paths1 = [
+                    p for p in paths1 if not (trailing_sep(p) or await self._isdir(p))
+                ]
+                if not paths1:
+                    return
+            source_is_file = len(paths1) == 1
+            dest_is_dir = isinstance(path2, str) and (
+                trailing_sep(path2) or await self._isdir(path2)
+            )
+            exists = source_is_str and (
+                (has_magic(path1) and source_is_file)
+                or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1))
+            )
+            paths2 = other_paths(
+                paths1,
+                path2,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        batch_size = batch_size or self.batch_size
+        coros = [self._cp_file(p1, p2, **kwargs) for p1, p2 in zip(paths1, paths2)]
+        result = await _run_coros_in_chunks(
+            coros, batch_size=batch_size, return_exceptions=True, nofiles=True
+        )
+        for ex in filter(is_exception, result):
+            if on_error == "ignore" and isinstance(ex, FileNotFoundError):
+                continue
+            raise ex
+    async def _pipe_file(self, path, value, **kwargs):
+        raise NotImplementedError
+    async def _pipe(self, path, value=None, batch_size=None, **kwargs):
+        if isinstance(path, str):
+            path = {path: value}
+        batch_size = batch_size or self.batch_size
+        return await _run_coros_in_chunks(
+            [self._pipe_file(k, v, **kwargs) for k, v in path.items()],
+            batch_size=batch_size,
+            nofiles=True,
+        )
+    async def _process_limits(self, url, start, end):
+        """Helper for "Range"-based _cat_file"""
+        size = None
+        suff = False
+        if start is not None and start < 0:
+            # if start is negative and end None, end is the "suffix length"
+            if end is None:
+                end = -start
+                start = ""
+                suff = True
+            else:
+                size = size or (await self._info(url))["size"]
+                start = size + start
+        elif start is None:
+            start = 0
+        if not suff:
+            if end is not None and end < 0:
+                if start is not None:
+                    size = size or (await self._info(url))["size"]
+                    end = size + end
+            elif end is None:
+                end = ""
+            if isinstance(end, numbers.Integral):
+                end -= 1  # bytes range is inclusive
+        return f"bytes={start}-{end}"
+    async def _cat_file(self, path, start=None, end=None, **kwargs):
+        raise NotImplementedError
+    async def _cat(
+        self, path, recursive=False, on_error="raise", batch_size=None, **kwargs
+    ):
+        paths = await self._expand_path(path, recursive=recursive)
+        coros = [self._cat_file(path, **kwargs) for path in paths]
+        batch_size = batch_size or self.batch_size
+        out = await _run_coros_in_chunks(
+            coros, batch_size=batch_size, nofiles=True, return_exceptions=True
+        )
+        if on_error == "raise":
+            ex = next(filter(is_exception, out), False)
+            if ex:
+                raise ex
+        if (
+            len(paths) > 1
+            or isinstance(path, list)
+            or paths[0] != self._strip_protocol(path)
+        ):
+            return {
+                k: v
+                for k, v in zip(paths, out)
+                if on_error != "omit" or not is_exception(v)
+            }
+        else:
+            return out[0]
+    async def _cat_ranges(
+        self,
+        paths,
+        starts,
+        ends,
+        max_gap=None,
+        batch_size=None,
+        on_error="return",
+        **kwargs,
+    ):
+        """Get the contents of byte ranges from one or more files
+        Parameters
+        ----------
+        paths: list
+            A list of of filepaths on this filesystems
+        starts, ends: int or list
+            Bytes limits of the read. If using a single int, the same value will be
+            used to read all the specified files.
+        """
+        # TODO: on_error
+        if max_gap is not None:
+            # use utils.merge_offset_ranges
+            raise NotImplementedError
+        if not isinstance(paths, list):
+            raise TypeError
+        if not isinstance(starts, Iterable):
+            starts = [starts] * len(paths)
+        if not isinstance(ends, Iterable):
+            ends = [ends] * len(paths)
+        if len(starts) != len(paths) or len(ends) != len(paths):
+            raise ValueError
+        coros = [
+            self._cat_file(p, start=s, end=e, **kwargs)
+            for p, s, e in zip(paths, starts, ends)
+        ]
+        batch_size = batch_size or self.batch_size
+        return await _run_coros_in_chunks(
+            coros, batch_size=batch_size, nofiles=True, return_exceptions=True
+        )
+    async def _put_file(self, lpath, rpath, **kwargs):
+        raise NotImplementedError
+    async def _put(
+        self,
+        lpath,
+        rpath,
+        recursive=False,
+        callback=_DEFAULT_CALLBACK,
+        batch_size=None,
+        maxdepth=None,
+        **kwargs,
+    ):
+        """Copy file(s) from local.
+        Copies a specific file or tree of files (if recursive=True). If rpath
+        ends with a "/", it will be assumed to be a directory, and target files
+        will go within.
+        The put_file method will be called concurrently on a batch of files. The
+        batch_size option can configure the amount of futures that can be executed
+        at the same time. If it is -1, then all the files will be uploaded concurrently.
+        The default can be set for this instance by passing "batch_size" in the
+        constructor, or for all instances by setting the "gather_batch_size" key
+        in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
+        """
+        if isinstance(lpath, list) and isinstance(rpath, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            rpaths = rpath
+            lpaths = lpath
+        else:
+            source_is_str = isinstance(lpath, str)
+            if source_is_str:
+                lpath = make_path_posix(lpath)
+            fs = LocalFileSystem()
+            lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
+                if not lpaths:
+                    return
+            source_is_file = len(lpaths) == 1
+            dest_is_dir = isinstance(rpath, str) and (
+                trailing_sep(rpath) or await self._isdir(rpath)
+            )
+            rpath = self._strip_protocol(rpath)
+            exists = source_is_str and (
+                (has_magic(lpath) and source_is_file)
+                or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
+            )
+            rpaths = other_paths(
+                lpaths,
+                rpath,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        is_dir = {l: os.path.isdir(l) for l in lpaths}
+        rdirs = [r for l, r in zip(lpaths, rpaths) if is_dir[l]]
+        file_pairs = [(l, r) for l, r in zip(lpaths, rpaths) if not is_dir[l]]
+        await asyncio.gather(*[self._makedirs(d, exist_ok=True) for d in rdirs])
+        batch_size = batch_size or self.batch_size
+        coros = []
+        callback.set_size(len(file_pairs))
+        for lfile, rfile in file_pairs:
+            callback.branch(lfile, rfile, kwargs)
+            coros.append(self._put_file(lfile, rfile, **kwargs))
+        return await _run_coros_in_chunks(
+            coros, batch_size=batch_size, callback=callback
+        )
+    async def _get_file(self, rpath, lpath, **kwargs):
+        raise NotImplementedError
+    async def _get(
+        self,
+        rpath,
+        lpath,
+        recursive=False,
+        callback=_DEFAULT_CALLBACK,
+        maxdepth=None,
+        **kwargs,
+    ):
+        """Copy file(s) to local.
+        Copies a specific file or tree of files (if recursive=True). If lpath
+        ends with a "/", it will be assumed to be a directory, and target files
+        will go within. Can submit a list of paths, which may be glob-patterns
+        and will be expanded.
+        The get_file method will be called concurrently on a batch of files. The
+        batch_size option can configure the amount of futures that can be executed
+        at the same time. If it is -1, then all the files will be uploaded concurrently.
+        The default can be set for this instance by passing "batch_size" in the
+        constructor, or for all instances by setting the "gather_batch_size" key
+        in ``fsspec.config.conf``, falling back to 1/8th of the system limit .
+        """
+        if isinstance(lpath, list) and isinstance(rpath, list):
+            # No need to expand paths when both source and destination
+            # are provided as lists
+            rpaths = rpath
+            lpaths = lpath
+        else:
+            source_is_str = isinstance(rpath, str)
+            # First check for rpath trailing slash as _strip_protocol removes it.
+            source_not_trailing_sep = source_is_str and not trailing_sep(rpath)
+            rpath = self._strip_protocol(rpath)
+            rpaths = await self._expand_path(
+                rpath, recursive=recursive, maxdepth=maxdepth
+            )
+            if source_is_str and (not recursive or maxdepth is not None):
+                # Non-recursive glob does not copy directories
+                rpaths = [
+                    p for p in rpaths if not (trailing_sep(p) or await self._isdir(p))
+                ]
+                if not rpaths:
+                    return
+            lpath = make_path_posix(lpath)
+            source_is_file = len(rpaths) == 1
+            dest_is_dir = isinstance(lpath, str) and (
+                trailing_sep(lpath) or LocalFileSystem().isdir(lpath)
+            )
+            exists = source_is_str and (
+                (has_magic(rpath) and source_is_file)
+                or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep)
+            )
+            lpaths = other_paths(
+                rpaths,
+                lpath,
+                exists=exists,
+                flatten=not source_is_str,
+            )
+        [os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths]
+        batch_size = kwargs.pop("batch_size", self.batch_size)
+        coros = []
+        callback.set_size(len(lpaths))
+        for lpath, rpath in zip(lpaths, rpaths):
+            callback.branch(rpath, lpath, kwargs)
+            coros.append(self._get_file(rpath, lpath, **kwargs))
+        return await _run_coros_in_chunks(
+            coros, batch_size=batch_size, callback=callback
+        )
+    async def _isfile(self, path):
+        try:
+            return (await self._info(path))["type"] == "file"
+        except:  # noqa: E722
+            return False
+    async def _isdir(self, path):
+        try:
+            return (await self._info(path))["type"] == "directory"
+        except OSError:
+            return False
+    async def _size(self, path):
+        return (await self._info(path)).get("size", None)
+    async def _sizes(self, paths, batch_size=None):
+        batch_size = batch_size or self.batch_size
+        return await _run_coros_in_chunks(
+            [self._size(p) for p in paths], batch_size=batch_size
+        )
+    async def _exists(self, path, **kwargs):
+        try:
+            await self._info(path, **kwargs)
+            return True
+        except FileNotFoundError:
+            return False
+    async def _info(self, path, **kwargs):
+        raise NotImplementedError
+    async def _ls(self, path, detail=True, **kwargs):
+        raise NotImplementedError
+    async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs):
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        path = self._strip_protocol(path)
+        full_dirs = {}
+        dirs = {}
+        files = {}
+        detail = kwargs.pop("detail", False)
+        try:
+            listing = await self._ls(path, detail=True, **kwargs)
+        except (FileNotFoundError, OSError) as e:
+            if on_error == "raise":
+                raise
+            elif callable(on_error):
+                on_error(e)
+            if detail:
+                yield path, {}, {}
+            else:
+                yield path, [], []
+            return
+        for info in listing:
+            # each info name must be at least [path]/part , but here
+            # we check also for names like [path]/part/
+            pathname = info["name"].rstrip("/")
+            name = pathname.rsplit("/", 1)[-1]
+            if info["type"] == "directory" and pathname != path:
+                # do not include "self" path
+                full_dirs[name] = pathname
+                dirs[name] = info
+            elif pathname == path:
+                # file-like with same name as give path
+                files[""] = info
+            else:
+                files[name] = info
+        if detail:
+            yield path, dirs, files
+        else:
+            yield path, list(dirs), list(files)
+        if maxdepth is not None:
+            maxdepth -= 1
+            if maxdepth < 1:
+                return
+        for d in dirs:
+            async for _ in self._walk(
+                full_dirs[d], maxdepth=maxdepth, detail=detail, **kwargs
+            ):
+                yield _
+    async def _glob(self, path, maxdepth=None, **kwargs):
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        import re
+        seps = (os.path.sep, os.path.altsep) if os.path.altsep else (os.path.sep,)
+        ends_with_sep = path.endswith(seps)  # _strip_protocol strips trailing slash
+        path = self._strip_protocol(path)
+        append_slash_to_dirname = ends_with_sep or path.endswith(
+            tuple(sep + "**" for sep in seps)
+        )
+        idx_star = path.find("*") if path.find("*") >= 0 else len(path)
+        idx_qmark = path.find("?") if path.find("?") >= 0 else len(path)
+        idx_brace = path.find("[") if path.find("[") >= 0 else len(path)
+        min_idx = min(idx_star, idx_qmark, idx_brace)
+        detail = kwargs.pop("detail", False)
+        if not has_magic(path):
+            if await self._exists(path, **kwargs):
+                if not detail:
+                    return [path]
+                else:
+                    return {path: await self._info(path, **kwargs)}
+            else:
+                if not detail:
+                    return []  # glob of non-existent returns empty
+                else:
+                    return {}
+        elif "/" in path[:min_idx]:
+            min_idx = path[:min_idx].rindex("/")
+            root = path[: min_idx + 1]
+            depth = path[min_idx + 1 :].count("/") + 1
+        else:
+            root = ""
+            depth = path[min_idx + 1 :].count("/") + 1
+        if "**" in path:
+            if maxdepth is not None:
+                idx_double_stars = path.find("**")
+                depth_double_stars = path[idx_double_stars:].count("/") + 1
+                depth = depth - depth_double_stars + maxdepth
+            else:
+                depth = None
+        allpaths = await self._find(
+            root, maxdepth=depth, withdirs=True, detail=True, **kwargs
+        )
+        pattern = glob_translate(path + ("/" if ends_with_sep else ""))
+        pattern = re.compile(pattern)
+        out = {
+            p: info
+            for p, info in sorted(allpaths.items())
+            if pattern.match(
+                (
+                    p + "/"
+                    if append_slash_to_dirname and info["type"] == "directory"
+                    else p
+                )
+            )
+        }
+        if detail:
+            return out
+        else:
+            return list(out)
+    async def _du(self, path, total=True, maxdepth=None, **kwargs):
+        sizes = {}
+        # async for?
+        for f in await self._find(path, maxdepth=maxdepth, **kwargs):
+            info = await self._info(f)
+            sizes[info["name"]] = info["size"]
+        if total:
+            return sum(sizes.values())
+        else:
+            return sizes
+    async def _find(self, path, maxdepth=None, withdirs=False, **kwargs):
+        path = self._strip_protocol(path)
+        out = {}
+        detail = kwargs.pop("detail", False)
+        # Add the root directory if withdirs is requested
+        # This is needed for posix glob compliance
+        if withdirs and path != "" and await self._isdir(path):
+            out[path] = await self._info(path)
+        # async for?
+        async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs):
+            if withdirs:
+                files.update(dirs)
+            out.update({info["name"]: info for name, info in files.items()})
+        if not out and (await self._isfile(path)):
+            # walk works on directories, but find should also return [path]
+            # when path happens to be a file
+            out[path] = {}
+        names = sorted(out)
+        if not detail:
+            return names
+        else:
+            return {name: out[name] for name in names}
+    async def _expand_path(self, path, recursive=False, maxdepth=None):
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+        if isinstance(path, str):
+            out = await self._expand_path([path], recursive, maxdepth)
+        else:
+            out = set()
+            path = [self._strip_protocol(p) for p in path]
+            for p in path:  # can gather here
+                if has_magic(p):
+                    bit = set(await self._glob(p, maxdepth=maxdepth))
+                    out |= bit
+                    if recursive:
+                        # glob call above expanded one depth so if maxdepth is defined
+                        # then decrement it in expand_path call below. If it is zero
+                        # after decrementing then avoid expand_path call.
+                        if maxdepth is not None and maxdepth <= 1:
+                            continue
+                        out |= set(
+                            await self._expand_path(
+                                list(bit),
+                                recursive=recursive,
+                                maxdepth=maxdepth - 1 if maxdepth is not None else None,
+                            )
+                        )
+                    continue
+                elif recursive:
+                    rec = set(await self._find(p, maxdepth=maxdepth, withdirs=True))
+                    out |= rec
+                if p not in out and (recursive is False or (await self._exists(p))):
+                    # should only check once, for the root
+                    out.add(p)
+        if not out:
+            raise FileNotFoundError(path)
+        return sorted(out)
+    async def _mkdir(self, path, create_parents=True, **kwargs):
+        pass  # not necessary to implement, may not have directories
+    async def _makedirs(self, path, exist_ok=False):
+        pass  # not necessary to implement, may not have directories
+    async def open_async(self, path, mode="rb", **kwargs):
+        if "b" not in mode or kwargs.get("compression"):
+            raise ValueError
+        raise NotImplementedError
+def mirror_sync_methods(obj):
+    """Populate sync and async methods for obj
+    For each method will create a sync version if the name refers to an async method
+    (coroutine) and there is no override in the child class; will create an async
+    method for the corresponding sync method if there is no implementation.
+    Uses the methods specified in
+    - async_methods: the set that an implementation is expected to provide
+    - default_async_methods: that can be derived from their sync version in
+      AbstractFileSystem
+    - AsyncFileSystem: async-specific default coroutines
+    """
+    from fsspec import AbstractFileSystem
+    for method in async_methods + dir(AsyncFileSystem):
+        if not method.startswith("_"):
+            continue
+        smethod = method[1:]
+        if private.match(method):
+            isco = inspect.iscoroutinefunction(getattr(obj, method, None))
+            unsync = getattr(getattr(obj, smethod, False), "__func__", None)
+            is_default = unsync is getattr(AbstractFileSystem, smethod, "")
+            if isco and is_default:
+                mth = sync_wrapper(getattr(obj, method), obj=obj)
+                setattr(obj, smethod, mth)
+                if not mth.__doc__:
+                    mth.__doc__ = getattr(
+                        getattr(AbstractFileSystem, smethod, None), "__doc__", ""
+                    )
+class FSSpecCoroutineCancel(Exception):
+    pass
+def _dump_running_tasks(
+    printout=True, cancel=True, exc=FSSpecCoroutineCancel, with_task=False
+):
+    import traceback
+    tasks = [t for t in asyncio.tasks.all_tasks(loop[0]) if not t.done()]
+    if printout:
+        [task.print_stack() for task in tasks]
+    out = [
+        {
+            "locals": task._coro.cr_frame.f_locals,
+            "file": task._coro.cr_frame.f_code.co_filename,
+            "firstline": task._coro.cr_frame.f_code.co_firstlineno,
+            "linelo": task._coro.cr_frame.f_lineno,
+            "stack": traceback.format_stack(task._coro.cr_frame),
+            "task": task if with_task else None,
+        }
+        for task in tasks
+    ]
+    if cancel:
+        for t in tasks:
+            cbs = t._callbacks
+            t.cancel()
+            asyncio.futures.Future.set_exception(t, exc)
+            asyncio.futures.Future.cancel(t)
+            [cb[0](t) for cb in cbs]  # cancels any dependent concurrent.futures
+            try:
+                t._coro.throw(exc)  # exits coro, unless explicitly handled
+            except exc:
+                pass
+    return out
+class AbstractAsyncStreamedFile(AbstractBufferedFile):
+    # no read buffering, and always auto-commit
+    # TODO: readahead might still be useful here, but needs async version
+    async def read(self, length=-1):
+        """
+        Return data from cache, or fetch pieces as necessary
+        Parameters
+        ----------
+        length: int (-1)
+            Number of bytes to read; if <0, all remaining bytes.
+        """
+        length = -1 if length is None else int(length)
+        if self.mode != "rb":
+            raise ValueError("File not in read mode")
+        if length < 0:
+            length = self.size - self.loc
+        if self.closed:
+            raise ValueError("I/O operation on closed file.")
+        if length == 0:
+            # don't even bother calling fetch
+            return b""
+        out = await self._fetch_range(self.loc, self.loc + length)
+        self.loc += len(out)
+        return out
+    async def write(self, data):
+        """
+        Write data to buffer.
+        Buffer only sent on flush() or if buffer is greater than
+        or equal to blocksize.
+        Parameters
+        ----------
+        data: bytes
+            Set of bytes to be written.
+        """
+        if self.mode not in {"wb", "ab"}:
+            raise ValueError("File not in write mode")
+        if self.closed:
+            raise ValueError("I/O operation on closed file.")
+        if self.forced:
+            raise ValueError("This file has been force-flushed, can only close")
+        out = self.buffer.write(data)
+        self.loc += out
+        if self.buffer.tell() >= self.blocksize:
+            await self.flush()
+        return out
+    async def close(self):
+        """Close file
+        Finalizes writes, discards cache
+        """
+        if getattr(self, "_unclosable", False):
+            return
+        if self.closed:
+            return
+        if self.mode == "rb":
+            self.cache = None
+        else:
+            if not self.forced:
+                await self.flush(force=True)
+            if self.fs is not None:
+                self.fs.invalidate_cache(self.path)
+                self.fs.invalidate_cache(self.fs._parent(self.path))
+        self.closed = True
+    async def flush(self, force=False):
+        if self.closed:
+            raise ValueError("Flush on closed file")
+        if force and self.forced:
+            raise ValueError("Force flush cannot be called more than once")
+        if force:
+            self.forced = True
+        if self.mode not in {"wb", "ab"}:
+            # no-op to flush on read-mode
+            return
+        if not force and self.buffer.tell() < self.blocksize:
+            # Defer write on small block
+            return
+        if self.offset is None:
+            # Initialize a multipart upload
+            self.offset = 0
+            try:
+                await self._initiate_upload()
+            except:  # noqa: E722
+                self.closed = True
+                raise
+        if await self._upload_chunk(final=force) is not False:
+            self.offset += self.buffer.seek(0, 2)
+            self.buffer = io.BytesIO()
+    async def __aenter__(self):
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.close()
+    async def _fetch_range(self, start, end):
+        raise NotImplementedError
+    async def _initiate_upload(self):
+        pass
+    async def _upload_chunk(self, final=False):
+        raise NotImplementedError

lib/python3.11/site-packages/fsspec/caching.py ADDED Viewed

	@@ -0,0 +1,875 @@

+from __future__ import annotations
+import collections
+import functools
+import logging
+import math
+import os
+import threading
+import warnings
+from concurrent.futures import Future, ThreadPoolExecutor
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    ClassVar,
+    Generic,
+    NamedTuple,
+    OrderedDict,
+    TypeVar,
+)
+if TYPE_CHECKING:
+    import mmap
+    from typing_extensions import ParamSpec
+    P = ParamSpec("P")
+else:
+    P = TypeVar("P")
+T = TypeVar("T")
+logger = logging.getLogger("fsspec")
+Fetcher = Callable[[int, int], bytes]  # Maps (start, end) to bytes
+class BaseCache:
+    """Pass-though cache: doesn't keep anything, calls every time
+    Acts as base class for other cachers
+    Parameters
+    ----------
+    blocksize: int
+        How far to read ahead in numbers of bytes
+    fetcher: func
+        Function of the form f(start, end) which gets bytes from remote as
+        specified
+    size: int
+        How big this file is
+    """
+    name: ClassVar[str] = "none"
+    def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
+        self.blocksize = blocksize
+        self.fetcher = fetcher
+        self.size = size
+    def _fetch(self, start: int | None, stop: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if stop is None:
+            stop = self.size
+        if start >= self.size or start >= stop:
+            return b""
+        return self.fetcher(start, stop)
+class MMapCache(BaseCache):
+    """memory-mapped sparse file cache
+    Opens temporary file, which is filled blocks-wise when data is requested.
+    Ensure there is enough disc space in the temporary location.
+    This cache method might only work on posix
+    """
+    name = "mmap"
+    def __init__(
+        self,
+        blocksize: int,
+        fetcher: Fetcher,
+        size: int,
+        location: str | None = None,
+        blocks: set[int] | None = None,
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.blocks = set() if blocks is None else blocks
+        self.location = location
+        self.cache = self._makefile()
+    def _makefile(self) -> mmap.mmap | bytearray:
+        import mmap
+        import tempfile
+        if self.size == 0:
+            return bytearray()
+        # posix version
+        if self.location is None or not os.path.exists(self.location):
+            if self.location is None:
+                fd = tempfile.TemporaryFile()
+                self.blocks = set()
+            else:
+                fd = open(self.location, "wb+")
+            fd.seek(self.size - 1)
+            fd.write(b"1")
+            fd.flush()
+        else:
+            fd = open(self.location, "r+b")
+        return mmap.mmap(fd.fileno(), self.size)
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        logger.debug(f"MMap cache fetching {start}-{end}")
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        start_block = start // self.blocksize
+        end_block = end // self.blocksize
+        need = [i for i in range(start_block, end_block + 1) if i not in self.blocks]
+        while need:
+            # TODO: not a for loop so we can consolidate blocks later to
+            # make fewer fetch calls; this could be parallel
+            i = need.pop(0)
+            sstart = i * self.blocksize
+            send = min(sstart + self.blocksize, self.size)
+            logger.debug(f"MMap get block #{i} ({sstart}-{send}")
+            self.cache[sstart:send] = self.fetcher(sstart, send)
+            self.blocks.add(i)
+        return self.cache[start:end]
+    def __getstate__(self) -> dict[str, Any]:
+        state = self.__dict__.copy()
+        # Remove the unpicklable entries.
+        del state["cache"]
+        return state
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        # Restore instance attributes
+        self.__dict__.update(state)
+        self.cache = self._makefile()
+class ReadAheadCache(BaseCache):
+    """Cache which reads only when we get beyond a block of data
+    This is a much simpler version of BytesCache, and does not attempt to
+    fill holes in the cache or keep fragments alive. It is best suited to
+    many small reads in a sequential order (e.g., reading lines from a file).
+    """
+    name = "readahead"
+    def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.cache = b""
+        self.start = 0
+        self.end = 0
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if end is None or end > self.size:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        l = end - start
+        if start >= self.start and end <= self.end:
+            # cache hit
+            return self.cache[start - self.start : end - self.start]
+        elif self.start <= start < self.end:
+            # partial hit
+            part = self.cache[start - self.start :]
+            l -= len(part)
+            start = self.end
+        else:
+            # miss
+            part = b""
+        end = min(self.size, end + self.blocksize)
+        self.cache = self.fetcher(start, end)  # new block replaces old
+        self.start = start
+        self.end = self.start + len(self.cache)
+        return part + self.cache[:l]
+class FirstChunkCache(BaseCache):
+    """Caches the first block of a file only
+    This may be useful for file types where the metadata is stored in the header,
+    but is randomly accessed.
+    """
+    name = "first"
+    def __init__(self, blocksize: int, fetcher: Fetcher, size: int) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.cache: bytes | None = None
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        start = start or 0
+        end = end or self.size
+        if start < self.blocksize:
+            if self.cache is None:
+                if end > self.blocksize:
+                    data = self.fetcher(0, end)
+                    self.cache = data[: self.blocksize]
+                    return data[start:]
+                self.cache = self.fetcher(0, self.blocksize)
+            part = self.cache[start:end]
+            if end > self.blocksize:
+                part += self.fetcher(self.blocksize, end)
+            return part
+        else:
+            return self.fetcher(start, end)
+class BlockCache(BaseCache):
+    """
+    Cache holding memory as a set of blocks.
+    Requests are only ever made ``blocksize`` at a time, and are
+    stored in an LRU cache. The least recently accessed block is
+    discarded when more than ``maxblocks`` are stored.
+    Parameters
+    ----------
+    blocksize : int
+        The number of bytes to store in each block.
+        Requests are only ever made for ``blocksize``, so this
+        should balance the overhead of making a request against
+        the granularity of the blocks.
+    fetcher : Callable
+    size : int
+        The total size of the file being cached.
+    maxblocks : int
+        The maximum number of blocks to cache for. The maximum memory
+        use for this cache is then ``blocksize * maxblocks``.
+    """
+    name = "blockcache"
+    def __init__(
+        self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.nblocks = math.ceil(size / blocksize)
+        self.maxblocks = maxblocks
+        self._fetch_block_cached = functools.lru_cache(maxblocks)(self._fetch_block)
+    def __repr__(self) -> str:
+        return (
+            f"<BlockCache blocksize={self.blocksize}, "
+            f"size={self.size}, nblocks={self.nblocks}>"
+        )
+    def cache_info(self):
+        """
+        The statistics on the block cache.
+        Returns
+        -------
+        NamedTuple
+            Returned directly from the LRU Cache used internally.
+        """
+        return self._fetch_block_cached.cache_info()
+    def __getstate__(self) -> dict[str, Any]:
+        state = self.__dict__
+        del state["_fetch_block_cached"]
+        return state
+    def __setstate__(self, state: dict[str, Any]) -> None:
+        self.__dict__.update(state)
+        self._fetch_block_cached = functools.lru_cache(state["maxblocks"])(
+            self._fetch_block
+        )
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        # byte position -> block numbers
+        start_block_number = start // self.blocksize
+        end_block_number = end // self.blocksize
+        # these are cached, so safe to do multiple calls for the same start and end.
+        for block_number in range(start_block_number, end_block_number + 1):
+            self._fetch_block_cached(block_number)
+        return self._read_cache(
+            start,
+            end,
+            start_block_number=start_block_number,
+            end_block_number=end_block_number,
+        )
+    def _fetch_block(self, block_number: int) -> bytes:
+        """
+        Fetch the block of data for `block_number`.
+        """
+        if block_number > self.nblocks:
+            raise ValueError(
+                f"'block_number={block_number}' is greater than "
+                f"the number of blocks ({self.nblocks})"
+            )
+        start = block_number * self.blocksize
+        end = start + self.blocksize
+        logger.info("BlockCache fetching block %d", block_number)
+        block_contents = super()._fetch(start, end)
+        return block_contents
+    def _read_cache(
+        self, start: int, end: int, start_block_number: int, end_block_number: int
+    ) -> bytes:
+        """
+        Read from our block cache.
+        Parameters
+        ----------
+        start, end : int
+            The start and end byte positions.
+        start_block_number, end_block_number : int
+            The start and end block numbers.
+        """
+        start_pos = start % self.blocksize
+        end_pos = end % self.blocksize
+        if start_block_number == end_block_number:
+            block: bytes = self._fetch_block_cached(start_block_number)
+            return block[start_pos:end_pos]
+        else:
+            # read from the initial
+            out = []
+            out.append(self._fetch_block_cached(start_block_number)[start_pos:])
+            # intermediate blocks
+            # Note: it'd be nice to combine these into one big request. However
+            # that doesn't play nicely with our LRU cache.
+            for block_number in range(start_block_number + 1, end_block_number):
+                out.append(self._fetch_block_cached(block_number))
+            # final block
+            out.append(self._fetch_block_cached(end_block_number)[:end_pos])
+            return b"".join(out)
+class BytesCache(BaseCache):
+    """Cache which holds data in a in-memory bytes object
+    Implements read-ahead by the block size, for semi-random reads progressing
+    through the file.
+    Parameters
+    ----------
+    trim: bool
+        As we read more data, whether to discard the start of the buffer when
+        we are more than a blocksize ahead of it.
+    """
+    name: ClassVar[str] = "bytes"
+    def __init__(
+        self, blocksize: int, fetcher: Fetcher, size: int, trim: bool = True
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.cache = b""
+        self.start: int | None = None
+        self.end: int | None = None
+        self.trim = trim
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        # TODO: only set start/end after fetch, in case it fails?
+        # is this where retry logic might go?
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        if (
+            self.start is not None
+            and start >= self.start
+            and self.end is not None
+            and end < self.end
+        ):
+            # cache hit: we have all the required data
+            offset = start - self.start
+            return self.cache[offset : offset + end - start]
+        if self.blocksize:
+            bend = min(self.size, end + self.blocksize)
+        else:
+            bend = end
+        if bend == start or start > self.size:
+            return b""
+        if (self.start is None or start < self.start) and (
+            self.end is None or end > self.end
+        ):
+            # First read, or extending both before and after
+            self.cache = self.fetcher(start, bend)
+            self.start = start
+        else:
+            assert self.start is not None
+            assert self.end is not None
+            if start < self.start:
+                if self.end is None or self.end - end > self.blocksize:
+                    self.cache = self.fetcher(start, bend)
+                    self.start = start
+                else:
+                    new = self.fetcher(start, self.start)
+                    self.start = start
+                    self.cache = new + self.cache
+            elif self.end is not None and bend > self.end:
+                if self.end > self.size:
+                    pass
+                elif end - self.end > self.blocksize:
+                    self.cache = self.fetcher(start, bend)
+                    self.start = start
+                else:
+                    new = self.fetcher(self.end, bend)
+                    self.cache = self.cache + new
+        self.end = self.start + len(self.cache)
+        offset = start - self.start
+        out = self.cache[offset : offset + end - start]
+        if self.trim:
+            num = (self.end - self.start) // (self.blocksize + 1)
+            if num > 1:
+                self.start += self.blocksize * num
+                self.cache = self.cache[self.blocksize * num :]
+        return out
+    def __len__(self) -> int:
+        return len(self.cache)
+class AllBytes(BaseCache):
+    """Cache entire contents of the file"""
+    name: ClassVar[str] = "all"
+    def __init__(
+        self,
+        blocksize: int | None = None,
+        fetcher: Fetcher | None = None,
+        size: int | None = None,
+        data: bytes | None = None,
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)  # type: ignore[arg-type]
+        if data is None:
+            data = self.fetcher(0, self.size)
+        self.data = data
+    def _fetch(self, start: int | None, stop: int | None) -> bytes:
+        return self.data[start:stop]
+class KnownPartsOfAFile(BaseCache):
+    """
+    Cache holding known file parts.
+    Parameters
+    ----------
+    blocksize: int
+        How far to read ahead in numbers of bytes
+    fetcher: func
+        Function of the form f(start, end) which gets bytes from remote as
+        specified
+    size: int
+        How big this file is
+    data: dict
+        A dictionary mapping explicit `(start, stop)` file-offset tuples
+        with known bytes.
+    strict: bool, default True
+        Whether to fetch reads that go beyond a known byte-range boundary.
+        If `False`, any read that ends outside a known part will be zero
+        padded. Note that zero padding will not be used for reads that
+        begin outside a known byte-range.
+    """
+    name: ClassVar[str] = "parts"
+    def __init__(
+        self,
+        blocksize: int,
+        fetcher: Fetcher,
+        size: int,
+        data: dict[tuple[int, int], bytes] = {},
+        strict: bool = True,
+        **_: Any,
+    ):
+        super().__init__(blocksize, fetcher, size)
+        self.strict = strict
+        # simple consolidation of contiguous blocks
+        if data:
+            old_offsets = sorted(data.keys())
+            offsets = [old_offsets[0]]
+            blocks = [data.pop(old_offsets[0])]
+            for start, stop in old_offsets[1:]:
+                start0, stop0 = offsets[-1]
+                if start == stop0:
+                    offsets[-1] = (start0, stop)
+                    blocks[-1] += data.pop((start, stop))
+                else:
+                    offsets.append((start, stop))
+                    blocks.append(data.pop((start, stop)))
+            self.data = dict(zip(offsets, blocks))
+        else:
+            self.data = data
+    def _fetch(self, start: int | None, stop: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if stop is None:
+            stop = self.size
+        out = b""
+        for (loc0, loc1), data in self.data.items():
+            # If self.strict=False, use zero-padded data
+            # for reads beyond the end of a "known" buffer
+            if loc0 <= start < loc1:
+                off = start - loc0
+                out = data[off : off + stop - start]
+                if not self.strict or loc0 <= stop <= loc1:
+                    # The request is within a known range, or
+                    # it begins within a known range, and we
+                    # are allowed to pad reads beyond the
+                    # buffer with zero
+                    out += b"\x00" * (stop - start - len(out))
+                    return out
+                else:
+                    # The request ends outside a known range,
+                    # and we are being "strict" about reads
+                    # beyond the buffer
+                    start = loc1
+                    break
+        # We only get here if there is a request outside the
+        # known parts of the file. In an ideal world, this
+        # should never happen
+        if self.fetcher is None:
+            # We cannot fetch the data, so raise an error
+            raise ValueError(f"Read is outside the known file parts: {(start, stop)}. ")
+        # We can fetch the data, but should warn the user
+        # that this may be slow
+        warnings.warn(
+            f"Read is outside the known file parts: {(start, stop)}. "
+            f"IO/caching performance may be poor!"
+        )
+        logger.debug(f"KnownPartsOfAFile cache fetching {start}-{stop}")
+        return out + super()._fetch(start, stop)
+class UpdatableLRU(Generic[P, T]):
+    """
+    Custom implementation of LRU cache that allows updating keys
+    Used by BackgroudBlockCache
+    """
+    class CacheInfo(NamedTuple):
+        hits: int
+        misses: int
+        maxsize: int
+        currsize: int
+    def __init__(self, func: Callable[P, T], max_size: int = 128) -> None:
+        self._cache: OrderedDict[Any, T] = collections.OrderedDict()
+        self._func = func
+        self._max_size = max_size
+        self._hits = 0
+        self._misses = 0
+        self._lock = threading.Lock()
+    def __call__(self, *args: P.args, **kwargs: P.kwargs) -> T:
+        if kwargs:
+            raise TypeError(f"Got unexpected keyword argument {kwargs.keys()}")
+        with self._lock:
+            if args in self._cache:
+                self._cache.move_to_end(args)
+                self._hits += 1
+                return self._cache[args]
+        result = self._func(*args, **kwargs)
+        with self._lock:
+            self._cache[args] = result
+            self._misses += 1
+            if len(self._cache) > self._max_size:
+                self._cache.popitem(last=False)
+        return result
+    def is_key_cached(self, *args: Any) -> bool:
+        with self._lock:
+            return args in self._cache
+    def add_key(self, result: T, *args: Any) -> None:
+        with self._lock:
+            self._cache[args] = result
+            if len(self._cache) > self._max_size:
+                self._cache.popitem(last=False)
+    def cache_info(self) -> UpdatableLRU.CacheInfo:
+        with self._lock:
+            return self.CacheInfo(
+                maxsize=self._max_size,
+                currsize=len(self._cache),
+                hits=self._hits,
+                misses=self._misses,
+            )
+class BackgroundBlockCache(BaseCache):
+    """
+    Cache holding memory as a set of blocks with pre-loading of
+    the next block in the background.
+    Requests are only ever made ``blocksize`` at a time, and are
+    stored in an LRU cache. The least recently accessed block is
+    discarded when more than ``maxblocks`` are stored. If the
+    next block is not in cache, it is loaded in a separate thread
+    in non-blocking way.
+    Parameters
+    ----------
+    blocksize : int
+        The number of bytes to store in each block.
+        Requests are only ever made for ``blocksize``, so this
+        should balance the overhead of making a request against
+        the granularity of the blocks.
+    fetcher : Callable
+    size : int
+        The total size of the file being cached.
+    maxblocks : int
+        The maximum number of blocks to cache for. The maximum memory
+        use for this cache is then ``blocksize * maxblocks``.
+    """
+    name: ClassVar[str] = "background"
+    def __init__(
+        self, blocksize: int, fetcher: Fetcher, size: int, maxblocks: int = 32
+    ) -> None:
+        super().__init__(blocksize, fetcher, size)
+        self.nblocks = math.ceil(size / blocksize)
+        self.maxblocks = maxblocks
+        self._fetch_block_cached = UpdatableLRU(self._fetch_block, maxblocks)
+        self._thread_executor = ThreadPoolExecutor(max_workers=1)
+        self._fetch_future_block_number: int | None = None
+        self._fetch_future: Future[bytes] | None = None
+        self._fetch_future_lock = threading.Lock()
+    def __repr__(self) -> str:
+        return (
+            f"<BackgroundBlockCache blocksize={self.blocksize}, "
+            f"size={self.size}, nblocks={self.nblocks}>"
+        )
+    def cache_info(self) -> UpdatableLRU.CacheInfo:
+        """
+        The statistics on the block cache.
+        Returns
+        -------
+        NamedTuple
+            Returned directly from the LRU Cache used internally.
+        """
+        return self._fetch_block_cached.cache_info()
+    def __getstate__(self) -> dict[str, Any]:
+        state = self.__dict__
+        del state["_fetch_block_cached"]
+        del state["_thread_executor"]
+        del state["_fetch_future_block_number"]
+        del state["_fetch_future"]
+        del state["_fetch_future_lock"]
+        return state
+    def __setstate__(self, state) -> None:
+        self.__dict__.update(state)
+        self._fetch_block_cached = UpdatableLRU(self._fetch_block, state["maxblocks"])
+        self._thread_executor = ThreadPoolExecutor(max_workers=1)
+        self._fetch_future_block_number = None
+        self._fetch_future = None
+        self._fetch_future_lock = threading.Lock()
+    def _fetch(self, start: int | None, end: int | None) -> bytes:
+        if start is None:
+            start = 0
+        if end is None:
+            end = self.size
+        if start >= self.size or start >= end:
+            return b""
+        # byte position -> block numbers
+        start_block_number = start // self.blocksize
+        end_block_number = end // self.blocksize
+        fetch_future_block_number = None
+        fetch_future = None
+        with self._fetch_future_lock:
+            # Background thread is running. Check we we can or must join it.
+            if self._fetch_future is not None:
+                assert self._fetch_future_block_number is not None
+                if self._fetch_future.done():
+                    logger.info("BlockCache joined background fetch without waiting.")
+                    self._fetch_block_cached.add_key(
+                        self._fetch_future.result(), self._fetch_future_block_number
+                    )
+                    # Cleanup the fetch variables. Done with fetching the block.
+                    self._fetch_future_block_number = None
+                    self._fetch_future = None
+                else:
+                    # Must join if we need the block for the current fetch
+                    must_join = bool(
+                        start_block_number
+                        <= self._fetch_future_block_number
+                        <= end_block_number
+                    )
+                    if must_join:
+                        # Copy to the local variables to release lock
+                        # before waiting for result
+                        fetch_future_block_number = self._fetch_future_block_number
+                        fetch_future = self._fetch_future
+                        # Cleanup the fetch variables. Have a local copy.
+                        self._fetch_future_block_number = None
+                        self._fetch_future = None
+        # Need to wait for the future for the current read
+        if fetch_future is not None:
+            logger.info("BlockCache waiting for background fetch.")
+            # Wait until result and put it in cache
+            self._fetch_block_cached.add_key(
+                fetch_future.result(), fetch_future_block_number
+            )
+        # these are cached, so safe to do multiple calls for the same start and end.
+        for block_number in range(start_block_number, end_block_number + 1):
+            self._fetch_block_cached(block_number)
+        # fetch next block in the background if nothing is running in the background,
+        # the block is within file and it is not already cached
+        end_block_plus_1 = end_block_number + 1
+        with self._fetch_future_lock:
+            if (
+                self._fetch_future is None
+                and end_block_plus_1 <= self.nblocks
+                and not self._fetch_block_cached.is_key_cached(end_block_plus_1)
+            ):
+                self._fetch_future_block_number = end_block_plus_1
+                self._fetch_future = self._thread_executor.submit(
+                    self._fetch_block, end_block_plus_1, "async"
+                )
+        return self._read_cache(
+            start,
+            end,
+            start_block_number=start_block_number,
+            end_block_number=end_block_number,
+        )
+    def _fetch_block(self, block_number: int, log_info: str = "sync") -> bytes:
+        """
+        Fetch the block of data for `block_number`.
+        """
+        if block_number > self.nblocks:
+            raise ValueError(
+                f"'block_number={block_number}' is greater than "
+                f"the number of blocks ({self.nblocks})"
+            )
+        start = block_number * self.blocksize
+        end = start + self.blocksize
+        logger.info("BlockCache fetching block (%s) %d", log_info, block_number)
+        block_contents = super()._fetch(start, end)
+        return block_contents
+    def _read_cache(
+        self, start: int, end: int, start_block_number: int, end_block_number: int
+    ) -> bytes:
+        """
+        Read from our block cache.
+        Parameters
+        ----------
+        start, end : int
+            The start and end byte positions.
+        start_block_number, end_block_number : int
+            The start and end block numbers.
+        """
+        start_pos = start % self.blocksize
+        end_pos = end % self.blocksize
+        if start_block_number == end_block_number:
+            block = self._fetch_block_cached(start_block_number)
+            return block[start_pos:end_pos]
+        else:
+            # read from the initial
+            out = []
+            out.append(self._fetch_block_cached(start_block_number)[start_pos:])
+            # intermediate blocks
+            # Note: it'd be nice to combine these into one big request. However
+            # that doesn't play nicely with our LRU cache.
+            for block_number in range(start_block_number + 1, end_block_number):
+                out.append(self._fetch_block_cached(block_number))
+            # final block
+            out.append(self._fetch_block_cached(end_block_number)[:end_pos])
+            return b"".join(out)
+caches: dict[str | None, type[BaseCache]] = {
+    # one custom case
+    None: BaseCache,
+}
+def register_cache(cls: type[BaseCache], clobber: bool = False) -> None:
+    """'Register' cache implementation.
+    Parameters
+    ----------
+    clobber: bool, optional
+        If set to True (default is False) - allow to overwrite existing
+        entry.
+    Raises
+    ------
+    ValueError
+    """
+    name = cls.name
+    if not clobber and name in caches:
+        raise ValueError(f"Cache with name {name!r} is already known: {caches[name]}")
+    caches[name] = cls
+for c in (
+    BaseCache,
+    MMapCache,
+    BytesCache,
+    ReadAheadCache,
+    BlockCache,
+    FirstChunkCache,
+    AllBytes,
+    KnownPartsOfAFile,
+    BackgroundBlockCache,
+):
+    register_cache(c)

lib/python3.11/site-packages/fsspec/callbacks.py ADDED Viewed

	@@ -0,0 +1,238 @@

+class Callback:
+    """
+    Base class and interface for callback mechanism
+    This class can be used directly for monitoring file transfers by
+    providing ``callback=Callback(hooks=...)`` (see the ``hooks`` argument,
+    below), or subclassed for more specialised behaviour.
+    Parameters
+    ----------
+    size: int (optional)
+        Nominal quantity for the value that corresponds to a complete
+        transfer, e.g., total number of tiles or total number of
+        bytes
+    value: int (0)
+        Starting internal counter value
+    hooks: dict or None
+        A dict of named functions to be called on each update. The signature
+        of these must be ``f(size, value, **kwargs)``
+    """
+    def __init__(self, size=None, value=0, hooks=None, **kwargs):
+        self.size = size
+        self.value = value
+        self.hooks = hooks or {}
+        self.kw = kwargs
+    def set_size(self, size):
+        """
+        Set the internal maximum size attribute
+        Usually called if not initially set at instantiation. Note that this
+        triggers a ``call()``.
+        Parameters
+        ----------
+        size: int
+        """
+        self.size = size
+        self.call()
+    def absolute_update(self, value):
+        """
+        Set the internal value state
+        Triggers ``call()``
+        Parameters
+        ----------
+        value: int
+        """
+        self.value = value
+        self.call()
+    def relative_update(self, inc=1):
+        """
+        Delta increment the internal counter
+        Triggers ``call()``
+        Parameters
+        ----------
+        inc: int
+        """
+        self.value += inc
+        self.call()
+    def call(self, hook_name=None, **kwargs):
+        """
+        Execute hook(s) with current state
+        Each function is passed the internal size and current value
+        Parameters
+        ----------
+        hook_name: str or None
+            If given, execute on this hook
+        kwargs: passed on to (all) hook(s)
+        """
+        if not self.hooks:
+            return
+        kw = self.kw.copy()
+        kw.update(kwargs)
+        if hook_name:
+            if hook_name not in self.hooks:
+                return
+            return self.hooks[hook_name](self.size, self.value, **kw)
+        for hook in self.hooks.values() or []:
+            hook(self.size, self.value, **kw)
+    def wrap(self, iterable):
+        """
+        Wrap an iterable to call ``relative_update`` on each iterations
+        Parameters
+        ----------
+        iterable: Iterable
+            The iterable that is being wrapped
+        """
+        for item in iterable:
+            self.relative_update()
+            yield item
+    def branch(self, path_1, path_2, kwargs):
+        """
+        Set callbacks for child transfers
+        If this callback is operating at a higher level, e.g., put, which may
+        trigger transfers that can also be monitored. The passed kwargs are
+        to be *mutated* to add ``callback=``, if this class supports branching
+        to children.
+        Parameters
+        ----------
+        path_1: str
+            Child's source path
+        path_2: str
+            Child's destination path
+        kwargs: dict
+            arguments passed to child method, e.g., put_file.
+        Returns
+        -------
+        """
+        return None
+    def no_op(self, *_, **__):
+        pass
+    def __getattr__(self, item):
+        """
+        If undefined methods are called on this class, nothing happens
+        """
+        return self.no_op
+    @classmethod
+    def as_callback(cls, maybe_callback=None):
+        """Transform callback=... into Callback instance
+        For the special value of ``None``, return the global instance of
+        ``NoOpCallback``. This is an alternative to including
+        ``callback=_DEFAULT_CALLBACK`` directly in a method signature.
+        """
+        if maybe_callback is None:
+            return _DEFAULT_CALLBACK
+        return maybe_callback
+class NoOpCallback(Callback):
+    """
+    This implementation of Callback does exactly nothing
+    """
+    def call(self, *args, **kwargs):
+        return None
+class DotPrinterCallback(Callback):
+    """
+    Simple example Callback implementation
+    Almost identical to Callback with a hook that prints a char; here we
+    demonstrate how the outer layer may print "#" and the inner layer "."
+    """
+    def __init__(self, chr_to_print="#", **kwargs):
+        self.chr = chr_to_print
+        super().__init__(**kwargs)
+    def branch(self, path_1, path_2, kwargs):
+        """Mutate kwargs to add new instance with different print char"""
+        kwargs["callback"] = DotPrinterCallback(".")
+    def call(self, **kwargs):
+        """Just outputs a character"""
+        print(self.chr, end="")
+class TqdmCallback(Callback):
+    """
+    A callback to display a progress bar using tqdm
+    Parameters
+    ----------
+    tqdm_kwargs : dict, (optional)
+        Any argument accepted by the tqdm constructor.
+        See the `tqdm doc <https://tqdm.github.io/docs/tqdm/#__init__>`_.
+        Will be forwarded to tqdm.
+    Examples
+    --------
+    >>> import fsspec
+    >>> from fsspec.callbacks import TqdmCallback
+    >>> fs = fsspec.filesystem("memory")
+    >>> path2distant_data = "/your-path"
+    >>> fs.upload(
+            ".",
+            path2distant_data,
+            recursive=True,
+            callback=TqdmCallback(),
+        )
+    You can forward args to tqdm using the ``tqdm_kwargs`` parameter.
+    >>> fs.upload(
+            ".",
+            path2distant_data,
+            recursive=True,
+            callback=TqdmCallback(tqdm_kwargs={"desc": "Your tqdm description"}),
+        )
+    """
+    def __init__(self, tqdm_kwargs=None, *args, **kwargs):
+        try:
+            import tqdm
+            self._tqdm = tqdm
+        except ImportError as exce:
+            raise ImportError(
+                "Using TqdmCallback requires tqdm to be installed"
+            ) from exce
+        self._tqdm_kwargs = tqdm_kwargs or {}
+        super().__init__(*args, **kwargs)
+    def set_size(self, size):
+        self.tqdm = self._tqdm.tqdm(total=size, **self._tqdm_kwargs)
+    def relative_update(self, inc=1):
+        self.tqdm.update(inc)
+    def __del__(self):
+        self.tqdm.close()
+        self.tqdm = None
+_DEFAULT_CALLBACK = NoOpCallback()

lib/python3.11/site-packages/fsspec/compression.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""Helper functions for a standard streaming compression API"""
+from zipfile import ZipFile
+import fsspec.utils
+from fsspec.spec import AbstractBufferedFile
+def noop_file(file, mode, **kwargs):
+    return file
+# TODO: files should also be available as contexts
+# should be functions of the form func(infile, mode=, **kwargs) -> file-like
+compr = {None: noop_file}
+def register_compression(name, callback, extensions, force=False):
+    """Register an "inferable" file compression type.
+    Registers transparent file compression type for use with fsspec.open.
+    Compression can be specified by name in open, or "infer"-ed for any files
+    ending with the given extensions.
+    Args:
+        name: (str) The compression type name. Eg. "gzip".
+        callback: A callable of form (infile, mode, **kwargs) -> file-like.
+            Accepts an input file-like object, the target mode and kwargs.
+            Returns a wrapped file-like object.
+        extensions: (str, Iterable[str]) A file extension, or list of file
+            extensions for which to infer this compression scheme. Eg. "gz".
+        force: (bool) Force re-registration of compression type or extensions.
+    Raises:
+        ValueError: If name or extensions already registered, and not force.
+    """
+    if isinstance(extensions, str):
+        extensions = [extensions]
+    # Validate registration
+    if name in compr and not force:
+        raise ValueError(f"Duplicate compression registration: {name}")
+    for ext in extensions:
+        if ext in fsspec.utils.compressions and not force:
+            raise ValueError(f"Duplicate compression file extension: {ext} ({name})")
+    compr[name] = callback
+    for ext in extensions:
+        fsspec.utils.compressions[ext] = name
+def unzip(infile, mode="rb", filename=None, **kwargs):
+    if "r" not in mode:
+        filename = filename or "file"
+        z = ZipFile(infile, mode="w", **kwargs)
+        fo = z.open(filename, mode="w")
+        fo.close = lambda closer=fo.close: closer() or z.close()
+        return fo
+    z = ZipFile(infile)
+    if filename is None:
+        filename = z.namelist()[0]
+    return z.open(filename, mode="r", **kwargs)
+register_compression("zip", unzip, "zip")
+try:
+    from bz2 import BZ2File
+except ImportError:
+    pass
+else:
+    register_compression("bz2", BZ2File, "bz2")
+try:  # pragma: no cover
+    from isal import igzip
+    def isal(infile, mode="rb", **kwargs):
+        return igzip.IGzipFile(fileobj=infile, mode=mode, **kwargs)
+    register_compression("gzip", isal, "gz")
+except ImportError:
+    from gzip import GzipFile
+    register_compression(
+        "gzip", lambda f, **kwargs: GzipFile(fileobj=f, **kwargs), "gz"
+    )
+try:
+    from lzma import LZMAFile
+    register_compression("lzma", LZMAFile, "xz")
+    register_compression("xz", LZMAFile, "xz", force=True)
+except ImportError:
+    pass
+try:
+    import lzmaffi
+    register_compression("lzma", lzmaffi.LZMAFile, "xz", force=True)
+    register_compression("xz", lzmaffi.LZMAFile, "xz", force=True)
+except ImportError:
+    pass
+class SnappyFile(AbstractBufferedFile):
+    def __init__(self, infile, mode, **kwargs):
+        import snappy
+        super().__init__(
+            fs=None, path="snappy", mode=mode.strip("b") + "b", size=999999999, **kwargs
+        )
+        self.infile = infile
+        if "r" in mode:
+            self.codec = snappy.StreamDecompressor()
+        else:
+            self.codec = snappy.StreamCompressor()
+    def _upload_chunk(self, final=False):
+        self.buffer.seek(0)
+        out = self.codec.add_chunk(self.buffer.read())
+        self.infile.write(out)
+        return True
+    def seek(self, loc, whence=0):
+        raise NotImplementedError("SnappyFile is not seekable")
+    def seekable(self):
+        return False
+    def _fetch_range(self, start, end):
+        """Get the specified set of bytes from remote"""
+        data = self.infile.read(end - start)
+        return self.codec.decompress(data)
+try:
+    import snappy
+    snappy.compress
+    # Snappy may use the .sz file extension, but this is not part of the
+    # standard implementation.
+    register_compression("snappy", SnappyFile, [])
+except (ImportError, NameError, AttributeError):
+    pass
+try:
+    import lz4.frame
+    register_compression("lz4", lz4.frame.open, "lz4")
+except ImportError:
+    pass
+try:
+    import zstandard as zstd
+    def zstandard_file(infile, mode="rb"):
+        if "r" in mode:
+            cctx = zstd.ZstdDecompressor()
+            return cctx.stream_reader(infile)
+        else:
+            cctx = zstd.ZstdCompressor(level=10)
+            return cctx.stream_writer(infile)
+    register_compression("zstd", zstandard_file, "zst")
+except ImportError:
+    pass
+def available_compressions():
+    """Return a list of the implemented compressions."""
+    return list(compr)

lib/python3.11/site-packages/fsspec/config.py ADDED Viewed

	@@ -0,0 +1,131 @@

+from __future__ import annotations
+import configparser
+import json
+import os
+import warnings
+from typing import Any
+conf: dict[str, dict[str, Any]] = {}
+default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec")
+conf_dir = os.environ.get("FSSPEC_CONFIG_DIR", default_conf_dir)
+def set_conf_env(conf_dict, envdict=os.environ):
+    """Set config values from environment variables
+    Looks for variables of the form ``FSSPEC_<protocol>`` and
+    ``FSSPEC_<protocol>_<kwarg>``. For ``FSSPEC_<protocol>`` the value is parsed
+    as a json dictionary and used to ``update`` the config of the
+    corresponding protocol. For ``FSSPEC_<protocol>_<kwarg>`` there is no
+    attempt to convert the string value, but the kwarg keys will be lower-cased.
+    The ``FSSPEC_<protocol>_<kwarg>`` variables are applied after the
+    ``FSSPEC_<protocol>`` ones.
+    Parameters
+    ----------
+    conf_dict : dict(str, dict)
+        This dict will be mutated
+    envdict : dict-like(str, str)
+        Source for the values - usually the real environment
+    """
+    kwarg_keys = []
+    for key in envdict:
+        if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_":
+            if key.count("_") > 1:
+                kwarg_keys.append(key)
+                continue
+            try:
+                value = json.loads(envdict[key])
+            except json.decoder.JSONDecodeError as ex:
+                warnings.warn(
+                    f"Ignoring environment variable {key} due to a parse failure: {ex}"
+                )
+            else:
+                if isinstance(value, dict):
+                    _, proto = key.split("_", 1)
+                    conf_dict.setdefault(proto.lower(), {}).update(value)
+                else:
+                    warnings.warn(
+                        f"Ignoring environment variable {key} due to not being a dict:"
+                        f" {type(value)}"
+                    )
+        elif key.startswith("FSSPEC"):
+            warnings.warn(
+                f"Ignoring environment variable {key} due to having an unexpected name"
+            )
+    for key in kwarg_keys:
+        _, proto, kwarg = key.split("_", 2)
+        conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key]
+def set_conf_files(cdir, conf_dict):
+    """Set config values from files
+    Scans for INI and JSON files in the given dictionary, and uses their
+    contents to set the config. In case of repeated values, later values
+    win.
+    In the case of INI files, all values are strings, and these will not
+    be converted.
+    Parameters
+    ----------
+    cdir : str
+        Directory to search
+    conf_dict : dict(str, dict)
+        This dict will be mutated
+    """
+    if not os.path.isdir(cdir):
+        return
+    allfiles = sorted(os.listdir(cdir))
+    for fn in allfiles:
+        if fn.endswith(".ini"):
+            ini = configparser.ConfigParser()
+            ini.read(os.path.join(cdir, fn))
+            for key in ini:
+                if key == "DEFAULT":
+                    continue
+                conf_dict.setdefault(key, {}).update(dict(ini[key]))
+        if fn.endswith(".json"):
+            with open(os.path.join(cdir, fn)) as f:
+                js = json.load(f)
+            for key in js:
+                conf_dict.setdefault(key, {}).update(dict(js[key]))
+def apply_config(cls, kwargs, conf_dict=None):
+    """Supply default values for kwargs when instantiating class
+    Augments the passed kwargs, by finding entries in the config dict
+    which match the classes ``.protocol`` attribute (one or more str)
+    Parameters
+    ----------
+    cls : file system implementation
+    kwargs : dict
+    conf_dict : dict of dict
+        Typically this is the global configuration
+    Returns
+    -------
+    dict : the modified set of kwargs
+    """
+    if conf_dict is None:
+        conf_dict = conf
+    protos = cls.protocol if isinstance(cls.protocol, (tuple, list)) else [cls.protocol]
+    kw = {}
+    for proto in protos:
+        # default kwargs from the current state of the config
+        if proto in conf_dict:
+            kw.update(conf_dict[proto])
+    # explicit kwargs always win
+    kw.update(**kwargs)
+    kwargs = kw
+    return kwargs
+set_conf_files(conf_dir, conf)
+set_conf_env(conf)

lib/python3.11/site-packages/fsspec/conftest.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import shutil
+import subprocess
+import sys
+import time
+import pytest
+import fsspec
+from fsspec.implementations.cached import CachingFileSystem
+@pytest.fixture()
+def m():
+    """
+    Fixture providing a memory filesystem.
+    """
+    m = fsspec.filesystem("memory")
+    m.store.clear()
+    m.pseudo_dirs.clear()
+    m.pseudo_dirs.append("")
+    try:
+        yield m
+    finally:
+        m.store.clear()
+        m.pseudo_dirs.clear()
+        m.pseudo_dirs.append("")
+@pytest.fixture
+def ftp_writable(tmpdir):
+    """
+    Fixture providing a writable FTP filesystem.
+    """
+    pytest.importorskip("pyftpdlib")
+    from fsspec.implementations.ftp import FTPFileSystem
+    FTPFileSystem.clear_instance_cache()  # remove lingering connections
+    CachingFileSystem.clear_instance_cache()
+    d = str(tmpdir)
+    with open(os.path.join(d, "out"), "wb") as f:
+        f.write(b"hello" * 10000)
+    P = subprocess.Popen(
+        [sys.executable, "-m", "pyftpdlib", "-d", d, "-u", "user", "-P", "pass", "-w"]
+    )
+    try:
+        time.sleep(1)
+        yield "localhost", 2121, "user", "pass"
+    finally:
+        P.terminate()
+        P.wait()
+        try:
+            shutil.rmtree(tmpdir)
+        except Exception:
+            pass

lib/python3.11/site-packages/fsspec/core.py ADDED Viewed

	@@ -0,0 +1,710 @@

+from __future__ import annotations
+import io
+import logging
+import os
+import re
+from glob import has_magic
+from pathlib import Path
+# for backwards compat, we export cache things from here too
+from .caching import (  # noqa: F401
+    BaseCache,
+    BlockCache,
+    BytesCache,
+    MMapCache,
+    ReadAheadCache,
+    caches,
+)
+from .compression import compr
+from .registry import filesystem, get_filesystem_class
+from .utils import (
+    _unstrip_protocol,
+    build_name_function,
+    infer_compression,
+    stringify_path,
+)
+logger = logging.getLogger("fsspec")
+class OpenFile:
+    """
+    File-like object to be used in a context
+    Can layer (buffered) text-mode and compression over any file-system, which
+    are typically binary-only.
+    These instances are safe to serialize, as the low-level file object
+    is not created until invoked using ``with``.
+    Parameters
+    ----------
+    fs: FileSystem
+        The file system to use for opening the file. Should be a subclass or duck-type
+        with ``fsspec.spec.AbstractFileSystem``
+    path: str
+        Location to open
+    mode: str like 'rb', optional
+        Mode of the opened file
+    compression: str or None, optional
+        Compression to apply
+    encoding: str or None, optional
+        The encoding to use if opened in text mode.
+    errors: str or None, optional
+        How to handle encoding errors if opened in text mode.
+    newline: None or str
+        Passed to TextIOWrapper in text mode, how to handle line endings.
+    autoopen: bool
+        If True, calls open() immediately. Mostly used by pickle
+    pos: int
+        If given and autoopen is True, seek to this location immediately
+    """
+    def __init__(
+        self,
+        fs,
+        path,
+        mode="rb",
+        compression=None,
+        encoding=None,
+        errors=None,
+        newline=None,
+    ):
+        self.fs = fs
+        self.path = path
+        self.mode = mode
+        self.compression = get_compression(path, compression)
+        self.encoding = encoding
+        self.errors = errors
+        self.newline = newline
+        self.fobjects = []
+    def __reduce__(self):
+        return (
+            OpenFile,
+            (
+                self.fs,
+                self.path,
+                self.mode,
+                self.compression,
+                self.encoding,
+                self.errors,
+                self.newline,
+            ),
+        )
+    def __repr__(self):
+        return f"<OpenFile '{self.path}'>"
+    def __enter__(self):
+        mode = self.mode.replace("t", "").replace("b", "") + "b"
+        f = self.fs.open(self.path, mode=mode)
+        self.fobjects = [f]
+        if self.compression is not None:
+            compress = compr[self.compression]
+            f = compress(f, mode=mode[0])
+            self.fobjects.append(f)
+        if "b" not in self.mode:
+            # assume, for example, that 'r' is equivalent to 'rt' as in builtin
+            f = PickleableTextIOWrapper(
+                f, encoding=self.encoding, errors=self.errors, newline=self.newline
+            )
+            self.fobjects.append(f)
+        return self.fobjects[-1]
+    def __exit__(self, *args):
+        self.close()
+    @property
+    def full_name(self):
+        return _unstrip_protocol(self.path, self.fs)
+    def open(self):
+        """Materialise this as a real open file without context
+        The OpenFile object should be explicitly closed to avoid enclosed file
+        instances persisting. You must, therefore, keep a reference to the OpenFile
+        during the life of the file-like it generates.
+        """
+        return self.__enter__()
+    def close(self):
+        """Close all encapsulated file objects"""
+        for f in reversed(self.fobjects):
+            if "r" not in self.mode and not f.closed:
+                f.flush()
+            f.close()
+        self.fobjects.clear()
+class OpenFiles(list):
+    """List of OpenFile instances
+    Can be used in a single context, which opens and closes all of the
+    contained files. Normal list access to get the elements works as
+    normal.
+    A special case is made for caching filesystems - the files will
+    be down/uploaded together at the start or end of the context, and
+    this may happen concurrently, if the target filesystem supports it.
+    """
+    def __init__(self, *args, mode="rb", fs=None):
+        self.mode = mode
+        self.fs = fs
+        self.files = []
+        super().__init__(*args)
+    def __enter__(self):
+        if self.fs is None:
+            raise ValueError("Context has already been used")
+        fs = self.fs
+        while True:
+            if hasattr(fs, "open_many"):
+                # check for concurrent cache download; or set up for upload
+                self.files = fs.open_many(self)
+                return self.files
+            if hasattr(fs, "fs") and fs.fs is not None:
+                fs = fs.fs
+            else:
+                break
+        return [s.__enter__() for s in self]
+    def __exit__(self, *args):
+        fs = self.fs
+        [s.__exit__(*args) for s in self]
+        if "r" not in self.mode:
+            while True:
+                if hasattr(fs, "open_many"):
+                    # check for concurrent cache upload
+                    fs.commit_many(self.files)
+                    return
+                if hasattr(fs, "fs") and fs.fs is not None:
+                    fs = fs.fs
+                else:
+                    break
+    def __getitem__(self, item):
+        out = super().__getitem__(item)
+        if isinstance(item, slice):
+            return OpenFiles(out, mode=self.mode, fs=self.fs)
+        return out
+    def __repr__(self):
+        return f"<List of {len(self)} OpenFile instances>"
+def open_files(
+    urlpath,
+    mode="rb",
+    compression=None,
+    encoding="utf8",
+    errors=None,
+    name_function=None,
+    num=1,
+    protocol=None,
+    newline=None,
+    auto_mkdir=True,
+    expand=True,
+    **kwargs,
+):
+    """Given a path or paths, return a list of ``OpenFile`` objects.
+    For writing, a str path must contain the "*" character, which will be filled
+    in by increasing numbers, e.g., "part*" ->  "part1", "part2" if num=2.
+    For either reading or writing, can instead provide explicit list of paths.
+    Parameters
+    ----------
+    urlpath: string or list
+        Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
+        to read from alternative filesystems. To read from multiple files you
+        can pass a globstring or a list of paths, with the caveat that they
+        must all have the same protocol.
+    mode: 'rb', 'wt', etc.
+    compression: string or None
+        If given, open file using compression codec. Can either be a compression
+        name (a key in ``fsspec.compression.compr``) or "infer" to guess the
+        compression from the filename suffix.
+    encoding: str
+        For text mode only
+    errors: None or str
+        Passed to TextIOWrapper in text mode
+    name_function: function or None
+        if opening a set of files for writing, those files do not yet exist,
+        so we need to generate their names by formatting the urlpath for
+        each sequence number
+    num: int [1]
+        if writing mode, number of files we expect to create (passed to
+        name+function)
+    protocol: str or None
+        If given, overrides the protocol found in the URL.
+    newline: bytes or None
+        Used for line terminator in text mode. If None, uses system default;
+        if blank, uses no translation.
+    auto_mkdir: bool (True)
+        If in write mode, this will ensure the target directory exists before
+        writing, by calling ``fs.mkdirs(exist_ok=True)``.
+    expand: bool
+    **kwargs: dict
+        Extra options that make sense to a particular storage connection, e.g.
+        host, port, username, password, etc.
+    Examples
+    --------
+    >>> files = open_files('2015-*-*.csv')  # doctest: +SKIP
+    >>> files = open_files(
+    ...     's3://bucket/2015-*-*.csv.gz', compression='gzip'
+    ... )  # doctest: +SKIP
+    Returns
+    -------
+    An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
+    be used as a single context
+    Notes
+    -----
+    For a full list of the available protocols and the implementations that
+    they map across to see the latest online documentation:
+    - For implementations built into ``fsspec`` see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
+    - For implementations in separate packages see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
+    """
+    fs, fs_token, paths = get_fs_token_paths(
+        urlpath,
+        mode,
+        num=num,
+        name_function=name_function,
+        storage_options=kwargs,
+        protocol=protocol,
+        expand=expand,
+    )
+    if fs.protocol == "file":
+        fs.auto_mkdir = auto_mkdir
+    elif "r" not in mode and auto_mkdir:
+        parents = {fs._parent(path) for path in paths}
+        for parent in parents:
+            try:
+                fs.makedirs(parent, exist_ok=True)
+            except PermissionError:
+                pass
+    return OpenFiles(
+        [
+            OpenFile(
+                fs,
+                path,
+                mode=mode,
+                compression=compression,
+                encoding=encoding,
+                errors=errors,
+                newline=newline,
+            )
+            for path in paths
+        ],
+        mode=mode,
+        fs=fs,
+    )
+def _un_chain(path, kwargs):
+    x = re.compile(".*[^a-z]+.*")  # test for non protocol-like single word
+    bits = (
+        [p if "://" in p or x.match(p) else p + "://" for p in path.split("::")]
+        if "::" in path
+        else [path]
+    )
+    # [[url, protocol, kwargs], ...]
+    out = []
+    previous_bit = None
+    kwargs = kwargs.copy()
+    for bit in reversed(bits):
+        protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
+        cls = get_filesystem_class(protocol)
+        extra_kwargs = cls._get_kwargs_from_urls(bit)
+        kws = kwargs.pop(protocol, {})
+        if bit is bits[0]:
+            kws.update(kwargs)
+        kw = dict(**extra_kwargs, **kws)
+        bit = cls._strip_protocol(bit)
+        if (
+            protocol in {"blockcache", "filecache", "simplecache"}
+            and "target_protocol" not in kw
+        ):
+            bit = previous_bit
+        out.append((bit, protocol, kw))
+        previous_bit = bit
+    out = list(reversed(out))
+    return out
+def url_to_fs(url, **kwargs):
+    """
+    Turn fully-qualified and potentially chained URL into filesystem instance
+    Parameters
+    ----------
+    url : str
+        The fsspec-compatible URL
+    **kwargs: dict
+        Extra options that make sense to a particular storage connection, e.g.
+        host, port, username, password, etc.
+    Returns
+    -------
+    filesystem : FileSystem
+        The new filesystem discovered from ``url`` and created with
+        ``**kwargs``.
+    urlpath : str
+        The file-systems-specific URL for ``url``.
+    """
+    # non-FS arguments that appear in fsspec.open()
+    # inspect could keep this in sync with open()'s signature
+    known_kwargs = {
+        "compression",
+        "encoding",
+        "errors",
+        "expand",
+        "mode",
+        "name_function",
+        "newline",
+        "num",
+    }
+    kwargs = {k: v for k, v in kwargs.items() if k not in known_kwargs}
+    chain = _un_chain(url, kwargs)
+    inkwargs = {}
+    # Reverse iterate the chain, creating a nested target_* structure
+    for i, ch in enumerate(reversed(chain)):
+        urls, protocol, kw = ch
+        if i == len(chain) - 1:
+            inkwargs = dict(**kw, **inkwargs)
+            continue
+        inkwargs["target_options"] = dict(**kw, **inkwargs)
+        inkwargs["target_protocol"] = protocol
+        inkwargs["fo"] = urls
+    urlpath, protocol, _ = chain[0]
+    fs = filesystem(protocol, **inkwargs)
+    return fs, urlpath
+def open(
+    urlpath,
+    mode="rb",
+    compression=None,
+    encoding="utf8",
+    errors=None,
+    protocol=None,
+    newline=None,
+    **kwargs,
+):
+    """Given a path or paths, return one ``OpenFile`` object.
+    Parameters
+    ----------
+    urlpath: string or list
+        Absolute or relative filepath. Prefix with a protocol like ``s3://``
+        to read from alternative filesystems. Should not include glob
+        character(s).
+    mode: 'rb', 'wt', etc.
+    compression: string or None
+        If given, open file using compression codec. Can either be a compression
+        name (a key in ``fsspec.compression.compr``) or "infer" to guess the
+        compression from the filename suffix.
+    encoding: str
+        For text mode only
+    errors: None or str
+        Passed to TextIOWrapper in text mode
+    protocol: str or None
+        If given, overrides the protocol found in the URL.
+    newline: bytes or None
+        Used for line terminator in text mode. If None, uses system default;
+        if blank, uses no translation.
+    **kwargs: dict
+        Extra options that make sense to a particular storage connection, e.g.
+        host, port, username, password, etc.
+    Examples
+    --------
+    >>> openfile = open('2015-01-01.csv')  # doctest: +SKIP
+    >>> openfile = open(
+    ...     's3://bucket/2015-01-01.csv.gz', compression='gzip'
+    ... )  # doctest: +SKIP
+    >>> with openfile as f:
+    ...     df = pd.read_csv(f)  # doctest: +SKIP
+    ...
+    Returns
+    -------
+    ``OpenFile`` object.
+    Notes
+    -----
+    For a full list of the available protocols and the implementations that
+    they map across to see the latest online documentation:
+    - For implementations built into ``fsspec`` see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
+    - For implementations in separate packages see
+      https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
+    """
+    out = open_files(
+        urlpath=[urlpath],
+        mode=mode,
+        compression=compression,
+        encoding=encoding,
+        errors=errors,
+        protocol=protocol,
+        newline=newline,
+        expand=False,
+        **kwargs,
+    )
+    if not out:
+        raise FileNotFoundError(urlpath)
+    return out[0]
+def open_local(
+    url: str | list[str] | Path | list[Path],
+    mode: str = "rb",
+    **storage_options: dict,
+) -> str | list[str]:
+    """Open file(s) which can be resolved to local
+    For files which either are local, or get downloaded upon open
+    (e.g., by file caching)
+    Parameters
+    ----------
+    url: str or list(str)
+    mode: str
+        Must be read mode
+    storage_options:
+        passed on to FS for or used by open_files (e.g., compression)
+    """
+    if "r" not in mode:
+        raise ValueError("Can only ensure local files when reading")
+    of = open_files(url, mode=mode, **storage_options)
+    if not getattr(of[0].fs, "local_file", False):
+        raise ValueError(
+            "open_local can only be used on a filesystem which"
+            " has attribute local_file=True"
+        )
+    with of as files:
+        paths = [f.name for f in files]
+    if (isinstance(url, str) and not has_magic(url)) or isinstance(url, Path):
+        return paths[0]
+    return paths
+def get_compression(urlpath, compression):
+    if compression == "infer":
+        compression = infer_compression(urlpath)
+    if compression is not None and compression not in compr:
+        raise ValueError(f"Compression type {compression} not supported")
+    return compression
+def split_protocol(urlpath):
+    """Return protocol, path pair"""
+    urlpath = stringify_path(urlpath)
+    if "://" in urlpath:
+        protocol, path = urlpath.split("://", 1)
+        if len(protocol) > 1:
+            # excludes Windows paths
+            return protocol, path
+    if urlpath.startswith("data:"):
+        return urlpath.split(":", 1)
+    return None, urlpath
+def strip_protocol(urlpath):
+    """Return only path part of full URL, according to appropriate backend"""
+    protocol, _ = split_protocol(urlpath)
+    cls = get_filesystem_class(protocol)
+    return cls._strip_protocol(urlpath)
+def expand_paths_if_needed(paths, mode, num, fs, name_function):
+    """Expand paths if they have a ``*`` in them (write mode) or any of ``*?[]``
+    in them (read mode).
+    :param paths: list of paths
+    mode: str
+        Mode in which to open files.
+    num: int
+        If opening in writing mode, number of files we expect to create.
+    fs: filesystem object
+    name_function: callable
+        If opening in writing mode, this callable is used to generate path
+        names. Names are generated for each partition by
+        ``urlpath.replace('*', name_function(partition_index))``.
+    :return: list of paths
+    """
+    expanded_paths = []
+    paths = list(paths)
+    if "w" in mode:  # read mode
+        if sum([1 for p in paths if "*" in p]) > 1:
+            raise ValueError(
+                "When writing data, only one filename mask can be specified."
+            )
+        num = max(num, len(paths))
+        for curr_path in paths:
+            if "*" in curr_path:
+                # expand using name_function
+                expanded_paths.extend(_expand_paths(curr_path, name_function, num))
+            else:
+                expanded_paths.append(curr_path)
+        # if we generated more paths that asked for, trim the list
+        if len(expanded_paths) > num:
+            expanded_paths = expanded_paths[:num]
+    else:  # read mode
+        for curr_path in paths:
+            if has_magic(curr_path):
+                # expand using glob
+                expanded_paths.extend(fs.glob(curr_path))
+            else:
+                expanded_paths.append(curr_path)
+    return expanded_paths
+def get_fs_token_paths(
+    urlpath,
+    mode="rb",
+    num=1,
+    name_function=None,
+    storage_options=None,
+    protocol=None,
+    expand=True,
+):
+    """Filesystem, deterministic token, and paths from a urlpath and options.
+    Parameters
+    ----------
+    urlpath: string or iterable
+        Absolute or relative filepath, URL (may include protocols like
+        ``s3://``), or globstring pointing to data.
+    mode: str, optional
+        Mode in which to open files.
+    num: int, optional
+        If opening in writing mode, number of files we expect to create.
+    name_function: callable, optional
+        If opening in writing mode, this callable is used to generate path
+        names. Names are generated for each partition by
+        ``urlpath.replace('*', name_function(partition_index))``.
+    storage_options: dict, optional
+        Additional keywords to pass to the filesystem class.
+    protocol: str or None
+        To override the protocol specifier in the URL
+    expand: bool
+        Expand string paths for writing, assuming the path is a directory
+    """
+    if isinstance(urlpath, (list, tuple, set)):
+        if not urlpath:
+            raise ValueError("empty urlpath sequence")
+        urlpath0 = stringify_path(list(urlpath)[0])
+    else:
+        urlpath0 = stringify_path(urlpath)
+    storage_options = storage_options or {}
+    if protocol:
+        storage_options["protocol"] = protocol
+    chain = _un_chain(urlpath0, storage_options or {})
+    inkwargs = {}
+    # Reverse iterate the chain, creating a nested target_* structure
+    for i, ch in enumerate(reversed(chain)):
+        urls, nested_protocol, kw = ch
+        if i == len(chain) - 1:
+            inkwargs = dict(**kw, **inkwargs)
+            continue
+        inkwargs["target_options"] = dict(**kw, **inkwargs)
+        inkwargs["target_protocol"] = nested_protocol
+        inkwargs["fo"] = urls
+    paths, protocol, _ = chain[0]
+    fs = filesystem(protocol, **inkwargs)
+    if isinstance(urlpath, (list, tuple, set)):
+        pchains = [
+            _un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
+        ]
+        if len({pc[1] for pc in pchains}) > 1:
+            raise ValueError("Protocol mismatch getting fs from %s", urlpath)
+        paths = [pc[0] for pc in pchains]
+    else:
+        paths = fs._strip_protocol(paths)
+    if isinstance(paths, (list, tuple, set)):
+        paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
+    else:
+        if "w" in mode and expand:
+            paths = _expand_paths(paths, name_function, num)
+        elif "x" in mode and expand:
+            paths = _expand_paths(paths, name_function, num)
+        elif "*" in paths:
+            paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
+        else:
+            paths = [paths]
+    return fs, fs._fs_token, paths
+def _expand_paths(path, name_function, num):
+    if isinstance(path, str):
+        if path.count("*") > 1:
+            raise ValueError("Output path spec must contain exactly one '*'.")
+        elif "*" not in path:
+            path = os.path.join(path, "*.part")
+        if name_function is None:
+            name_function = build_name_function(num - 1)
+        paths = [path.replace("*", name_function(i)) for i in range(num)]
+        if paths != sorted(paths):
+            logger.warning(
+                "In order to preserve order between partitions"
+                " paths created with ``name_function`` should "
+                "sort to partition order"
+            )
+    elif isinstance(path, (tuple, list)):
+        assert len(path) == num
+        paths = list(path)
+    else:
+        raise ValueError(
+            "Path should be either\n"
+            "1. A list of paths: ['foo.json', 'bar.json', ...]\n"
+            "2. A directory: 'foo/\n"
+            "3. A path with a '*' in it: 'foo.*.json'"
+        )
+    return paths
+class PickleableTextIOWrapper(io.TextIOWrapper):
+    """TextIOWrapper cannot be pickled. This solves it.
+    Requires that ``buffer`` be pickleable, which all instances of
+    AbstractBufferedFile are.
+    """
+    def __init__(
+        self,
+        buffer,
+        encoding=None,
+        errors=None,
+        newline=None,
+        line_buffering=False,
+        write_through=False,
+    ):
+        self.args = buffer, encoding, errors, newline, line_buffering, write_through
+        super().__init__(*self.args)
+    def __reduce__(self):
+        return PickleableTextIOWrapper, self.args

lib/python3.11/site-packages/fsspec/dircache.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import time
+from collections.abc import MutableMapping
+from functools import lru_cache
+class DirCache(MutableMapping):
+    """
+    Caching of directory listings, in a structure like::
+        {"path0": [
+            {"name": "path0/file0",
+             "size": 123,
+             "type": "file",
+             ...
+            },
+            {"name": "path0/file1",
+            },
+            ...
+            ],
+         "path1": [...]
+        }
+    Parameters to this class control listing expiry or indeed turn
+    caching off
+    """
+    def __init__(
+        self,
+        use_listings_cache=True,
+        listings_expiry_time=None,
+        max_paths=None,
+        **kwargs,
+    ):
+        """
+        Parameters
+        ----------
+        use_listings_cache: bool
+            If False, this cache never returns items, but always reports KeyError,
+            and setting items has no effect
+        listings_expiry_time: int or float (optional)
+            Time in seconds that a listing is considered valid. If None,
+            listings do not expire.
+        max_paths: int (optional)
+            The number of most recent listings that are considered valid; 'recent'
+            refers to when the entry was set.
+        """
+        self._cache = {}
+        self._times = {}
+        if max_paths:
+            self._q = lru_cache(max_paths + 1)(lambda key: self._cache.pop(key, None))
+        self.use_listings_cache = use_listings_cache
+        self.listings_expiry_time = listings_expiry_time
+        self.max_paths = max_paths
+    def __getitem__(self, item):
+        if self.listings_expiry_time is not None:
+            if self._times.get(item, 0) - time.time() < -self.listings_expiry_time:
+                del self._cache[item]
+        if self.max_paths:
+            self._q(item)
+        return self._cache[item]  # maybe raises KeyError
+    def clear(self):
+        self._cache.clear()
+    def __len__(self):
+        return len(self._cache)
+    def __contains__(self, item):
+        try:
+            self[item]
+            return True
+        except KeyError:
+            return False
+    def __setitem__(self, key, value):
+        if not self.use_listings_cache:
+            return
+        if self.max_paths:
+            self._q(key)
+        self._cache[key] = value
+        if self.listings_expiry_time is not None:
+            self._times[key] = time.time()
+    def __delitem__(self, key):
+        del self._cache[key]
+    def __iter__(self):
+        entries = list(self._cache)
+        return (k for k in entries if k in self)
+    def __reduce__(self):
+        return (
+            DirCache,
+            (self.use_listings_cache, self.listings_expiry_time, self.max_paths),
+        )

lib/python3.11/site-packages/fsspec/exceptions.py ADDED Viewed

	@@ -0,0 +1,21 @@

+"""
+fsspec user-defined exception classes
+"""
+import asyncio
+class BlocksizeMismatchError(ValueError):
+    """
+    Raised when a cached file is opened with a different blocksize than it was
+    written with
+    """
+    ...
+class FSTimeoutError(asyncio.TimeoutError):
+    """
+    Raised when a fsspec function timed out occurs
+    """
+    ...