Spaces:

jexzeb
/

DragGAN

Runtime error

App Files Files Community

DragGAN / venv /lib /python3.10 /site-packages /fsspec /core.py

jexzeb

Upload folder using huggingface_hub

b6068b4 about 1 year ago

raw

history blame contribute delete

No virus

21.6 kB

	from __future__ import absolute_import, division, print_function

	import io
	import logging
	import os
	import re
	from glob import has_magic

	# for backwards compat, we export cache things from here too
	from .caching import ( # noqa: F401
	BaseCache,
	BlockCache,
	BytesCache,
	MMapCache,
	ReadAheadCache,
	caches,
	)
	from .compression import compr
	from .registry import filesystem, get_filesystem_class
	from .utils import (
	_unstrip_protocol,
	build_name_function,
	infer_compression,
	stringify_path,
	)

	logger = logging.getLogger("fsspec")


	class OpenFile:
	"""
	File-like object to be used in a context

	Can layer (buffered) text-mode and compression over any file-system, which
	are typically binary-only.

	These instances are safe to serialize, as the low-level file object
	is not created until invoked using ``with``.

	Parameters
	----------
	fs: FileSystem
	The file system to use for opening the file. Should be a subclass or duck-type
	with ``fsspec.spec.AbstractFileSystem``
	path: str
	Location to open
	mode: str like 'rb', optional
	Mode of the opened file
	compression: str or None, optional
	Compression to apply
	encoding: str or None, optional
	The encoding to use if opened in text mode.
	errors: str or None, optional
	How to handle encoding errors if opened in text mode.
	newline: None or str
	Passed to TextIOWrapper in text mode, how to handle line endings.
	autoopen: bool
	If True, calls open() immediately. Mostly used by pickle
	pos: int
	If given and autoopen is True, seek to this location immediately
	"""

	def __init__(
	self,
	fs,
	path,
	mode="rb",
	compression=None,
	encoding=None,
	errors=None,
	newline=None,
	):
	self.fs = fs
	self.path = path
	self.mode = mode
	self.compression = get_compression(path, compression)
	self.encoding = encoding
	self.errors = errors
	self.newline = newline
	self.fobjects = []

	def __reduce__(self):
	return (
	OpenFile,
	(
	self.fs,
	self.path,
	self.mode,
	self.compression,
	self.encoding,
	self.errors,
	self.newline,
	),
	)

	def __repr__(self):
	return "<OpenFile '{}'>".format(self.path)

	def __enter__(self):
	mode = self.mode.replace("t", "").replace("b", "") + "b"

	f = self.fs.open(self.path, mode=mode)

	self.fobjects = [f]

	if self.compression is not None:
	compress = compr[self.compression]
	f = compress(f, mode=mode[0])
	self.fobjects.append(f)

	if "b" not in self.mode:
	# assume, for example, that 'r' is equivalent to 'rt' as in builtin
	f = PickleableTextIOWrapper(
	f, encoding=self.encoding, errors=self.errors, newline=self.newline
	)
	self.fobjects.append(f)

	return self.fobjects[-1]

	def __exit__(self, *args):
	self.close()

	@property
	def full_name(self):
	return _unstrip_protocol(self.path, self.fs)

	def open(self):
	"""Materialise this as a real open file without context

	The OpenFile object should be explicitly closed to avoid enclosed file
	instances persisting. You must, therefore, keep a reference to the OpenFile
	during the life of the file-like it generates.
	"""
	return self.__enter__()

	def close(self):
	"""Close all encapsulated file objects"""
	for f in reversed(self.fobjects):
	if "r" not in self.mode and not f.closed:
	f.flush()
	f.close()
	self.fobjects.clear()


	class OpenFiles(list):
	"""List of OpenFile instances

	Can be used in a single context, which opens and closes all of the
	contained files. Normal list access to get the elements works as
	normal.

	A special case is made for caching filesystems - the files will
	be down/uploaded together at the start or end of the context, and
	this may happen concurrently, if the target filesystem supports it.
	"""

	def __init__(self, *args, mode="rb", fs=None):
	self.mode = mode
	self.fs = fs
	self.files = []
	super().__init__(*args)

	def __enter__(self):
	if self.fs is None:
	raise ValueError("Context has already been used")

	fs = self.fs
	while True:
	if hasattr(fs, "open_many"):
	# check for concurrent cache download; or set up for upload
	self.files = fs.open_many(self)
	return self.files
	if hasattr(fs, "fs") and fs.fs is not None:
	fs = fs.fs
	else:
	break
	return [s.__enter__() for s in self]

	def __exit__(self, *args):
	fs = self.fs
	[s.__exit__(*args) for s in self]
	if "r" not in self.mode:
	while True:
	if hasattr(fs, "open_many"):
	# check for concurrent cache upload
	fs.commit_many(self.files)
	return
	if hasattr(fs, "fs") and fs.fs is not None:
	fs = fs.fs
	else:
	break

	def __getitem__(self, item):
	out = super().__getitem__(item)
	if isinstance(item, slice):
	return OpenFiles(out, mode=self.mode, fs=self.fs)
	return out

	def __repr__(self):
	return "<List of %s OpenFile instances>" % len(self)


	def open_files(
	urlpath,
	mode="rb",
	compression=None,
	encoding="utf8",
	errors=None,
	name_function=None,
	num=1,
	protocol=None,
	newline=None,
	auto_mkdir=True,
	expand=True,
	**kwargs,
	):
	"""Given a path or paths, return a list of ``OpenFile`` objects.

	For writing, a str path must contain the "*" character, which will be filled
	in by increasing numbers, e.g., "part*" -> "part1", "part2" if num=2.

	For either reading or writing, can instead provide explicit list of paths.

	Parameters
	----------
	urlpath: string or list
	Absolute or relative filepath(s). Prefix with a protocol like ``s3://``
	to read from alternative filesystems. To read from multiple files you
	can pass a globstring or a list of paths, with the caveat that they
	must all have the same protocol.
	mode: 'rb', 'wt', etc.
	compression: string or None
	If given, open file using compression codec. Can either be a compression
	name (a key in ``fsspec.compression.compr``) or "infer" to guess the
	compression from the filename suffix.
	encoding: str
	For text mode only
	errors: None or str
	Passed to TextIOWrapper in text mode
	name_function: function or None
	if opening a set of files for writing, those files do not yet exist,
	so we need to generate their names by formatting the urlpath for
	each sequence number
	num: int [1]
	if writing mode, number of files we expect to create (passed to
	name+function)
	protocol: str or None
	If given, overrides the protocol found in the URL.
	newline: bytes or None
	Used for line terminator in text mode. If None, uses system default;
	if blank, uses no translation.
	auto_mkdir: bool (True)
	If in write mode, this will ensure the target directory exists before
	writing, by calling ``fs.mkdirs(exist_ok=True)``.
	expand: bool
	**kwargs: dict
	Extra options that make sense to a particular storage connection, e.g.
	host, port, username, password, etc.

	Examples
	--------
	>>> files = open_files('2015--.csv') # doctest: +SKIP
	>>> files = open_files(
	... 's3://bucket/2015--.csv.gz', compression='gzip'
	... ) # doctest: +SKIP

	Returns
	-------
	An ``OpenFiles`` instance, which is a list of ``OpenFile`` objects that can
	be used as a single context

	Notes
	-----
	For a full list of the available protocols and the implementations that
	they map across to see the latest online documentation:

	- For implementations built into ``fsspec`` see
	https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
	- For implementations in separate packages see
	https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
	"""
	fs, fs_token, paths = get_fs_token_paths(
	urlpath,
	mode,
	num=num,
	name_function=name_function,
	storage_options=kwargs,
	protocol=protocol,
	expand=expand,
	)
	if "r" not in mode and auto_mkdir:
	parents = {fs._parent(path) for path in paths}
	[fs.makedirs(parent, exist_ok=True) for parent in parents]
	return OpenFiles(
	[
	OpenFile(
	fs,
	path,
	mode=mode,
	compression=compression,
	encoding=encoding,
	errors=errors,
	newline=newline,
	)
	for path in paths
	],
	mode=mode,
	fs=fs,
	)


	def _un_chain(path, kwargs):
	x = re.compile(".[^a-z]+.") # test for non protocol-like single word
	bits = (
	[p if "://" in p or x.match(p) else p + "://" for p in path.split("::")]
	if "::" in path
	else [path]
	)
	# [[url, protocol, kwargs], ...]
	out = []
	previous_bit = None
	kwargs = kwargs.copy()
	for bit in reversed(bits):
	protocol = kwargs.pop("protocol", None) or split_protocol(bit)[0] or "file"
	cls = get_filesystem_class(protocol)
	extra_kwargs = cls._get_kwargs_from_urls(bit)
	kws = kwargs.pop(protocol, {})
	if bit is bits[0]:
	kws.update(kwargs)
	kw = dict(extra_kwargs, kws)
	bit = cls._strip_protocol(bit)
	if (
	protocol in {"blockcache", "filecache", "simplecache"}
	and "target_protocol" not in kw
	):
	bit = previous_bit
	out.append((bit, protocol, kw))
	previous_bit = bit
	out = list(reversed(out))
	return out


	def url_to_fs(url, **kwargs):
	"""
	Turn fully-qualified and potentially chained URL into filesystem instance

	Parameters
	----------
	url : str
	The fsspec-compatible URL
	**kwargs: dict
	Extra options that make sense to a particular storage connection, e.g.
	host, port, username, password, etc.

	Returns
	-------
	filesystem : FileSystem
	The new filesystem discovered from ``url`` and created with
	``**kwargs``.
	urlpath : str
	The file-systems-specific URL for ``url``.
	"""
	chain = _un_chain(url, kwargs)
	inkwargs = {}
	# Reverse iterate the chain, creating a nested target_* structure
	for i, ch in enumerate(reversed(chain)):
	urls, protocol, kw = ch
	if i == len(chain) - 1:
	inkwargs = dict(kw, inkwargs)
	continue
	inkwargs["target_options"] = dict(kw, inkwargs)
	inkwargs["target_protocol"] = protocol
	inkwargs["fo"] = urls
	urlpath, protocol, _ = chain[0]
	fs = filesystem(protocol, **inkwargs)
	return fs, urlpath


	def open(
	urlpath,
	mode="rb",
	compression=None,
	encoding="utf8",
	errors=None,
	protocol=None,
	newline=None,
	**kwargs,
	):
	"""Given a path or paths, return one ``OpenFile`` object.

	Parameters
	----------
	urlpath: string or list
	Absolute or relative filepath. Prefix with a protocol like ``s3://``
	to read from alternative filesystems. Should not include glob
	character(s).
	mode: 'rb', 'wt', etc.
	compression: string or None
	If given, open file using compression codec. Can either be a compression
	name (a key in ``fsspec.compression.compr``) or "infer" to guess the
	compression from the filename suffix.
	encoding: str
	For text mode only
	errors: None or str
	Passed to TextIOWrapper in text mode
	protocol: str or None
	If given, overrides the protocol found in the URL.
	newline: bytes or None
	Used for line terminator in text mode. If None, uses system default;
	if blank, uses no translation.
	**kwargs: dict
	Extra options that make sense to a particular storage connection, e.g.
	host, port, username, password, etc.

	Examples
	--------
	>>> openfile = open('2015-01-01.csv') # doctest: +SKIP
	>>> openfile = open(
	... 's3://bucket/2015-01-01.csv.gz', compression='gzip'
	... ) # doctest: +SKIP
	>>> with openfile as f:
	... df = pd.read_csv(f) # doctest: +SKIP
	...

	Returns
	-------
	``OpenFile`` object.

	Notes
	-----
	For a full list of the available protocols and the implementations that
	they map across to see the latest online documentation:

	- For implementations built into ``fsspec`` see
	https://filesystem-spec.readthedocs.io/en/latest/api.html#built-in-implementations
	- For implementations in separate packages see
	https://filesystem-spec.readthedocs.io/en/latest/api.html#other-known-implementations
	"""
	out = open_files(
	urlpath=[urlpath],
	mode=mode,
	compression=compression,
	encoding=encoding,
	errors=errors,
	protocol=protocol,
	newline=newline,
	expand=False,
	**kwargs,
	)
	if not out:
	raise FileNotFoundError(urlpath)
	return out[0]


	def open_local(url, mode="rb", **storage_options):
	"""Open file(s) which can be resolved to local

	For files which either are local, or get downloaded upon open
	(e.g., by file caching)

	Parameters
	----------
	url: str or list(str)
	mode: str
	Must be read mode
	storage_options:
	passed on to FS for or used by open_files (e.g., compression)
	"""
	if "r" not in mode:
	raise ValueError("Can only ensure local files when reading")
	of = open_files(url, mode=mode, **storage_options)
	if not getattr(of[0].fs, "local_file", False):
	raise ValueError(
	"open_local can only be used on a filesystem which"
	" has attribute local_file=True"
	)
	with of as files:
	paths = [f.name for f in files]
	if isinstance(url, str) and not has_magic(url):
	return paths[0]
	return paths


	def get_compression(urlpath, compression):
	if compression == "infer":
	compression = infer_compression(urlpath)
	if compression is not None and compression not in compr:
	raise ValueError("Compression type %s not supported" % compression)
	return compression


	def split_protocol(urlpath):
	"""Return protocol, path pair"""
	urlpath = stringify_path(urlpath)
	if "://" in urlpath:
	protocol, path = urlpath.split("://", 1)
	if len(protocol) > 1:
	# excludes Windows paths
	return protocol, path
	return None, urlpath


	def strip_protocol(urlpath):
	"""Return only path part of full URL, according to appropriate backend"""
	protocol, _ = split_protocol(urlpath)
	cls = get_filesystem_class(protocol)
	return cls._strip_protocol(urlpath)


	def expand_paths_if_needed(paths, mode, num, fs, name_function):
	"""Expand paths if they have a ```` in them (write mode) or any of ``?[]``
	in them (read mode).

	:param paths: list of paths
	mode: str
	Mode in which to open files.
	num: int
	If opening in writing mode, number of files we expect to create.
	fs: filesystem object
	name_function: callable
	If opening in writing mode, this callable is used to generate path
	names. Names are generated for each partition by
	``urlpath.replace('*', name_function(partition_index))``.
	:return: list of paths
	"""
	expanded_paths = []
	paths = list(paths)

	if "w" in mode: # read mode
	if sum([1 for p in paths if "*" in p]) > 1:
	raise ValueError(
	"When writing data, only one filename mask can be specified."
	)
	num = max(num, len(paths))

	for curr_path in paths:
	if "*" in curr_path:
	# expand using name_function
	expanded_paths.extend(_expand_paths(curr_path, name_function, num))
	else:
	expanded_paths.append(curr_path)
	# if we generated more paths that asked for, trim the list
	if len(expanded_paths) > num:
	expanded_paths = expanded_paths[:num]

	else: # read mode
	for curr_path in paths:
	if has_magic(curr_path):
	# expand using glob
	expanded_paths.extend(fs.glob(curr_path))
	else:
	expanded_paths.append(curr_path)

	return expanded_paths


	def get_fs_token_paths(
	urlpath,
	mode="rb",
	num=1,
	name_function=None,
	storage_options=None,
	protocol=None,
	expand=True,
	):
	"""Filesystem, deterministic token, and paths from a urlpath and options.

	Parameters
	----------
	urlpath: string or iterable
	Absolute or relative filepath, URL (may include protocols like
	``s3://``), or globstring pointing to data.
	mode: str, optional
	Mode in which to open files.
	num: int, optional
	If opening in writing mode, number of files we expect to create.
	name_function: callable, optional
	If opening in writing mode, this callable is used to generate path
	names. Names are generated for each partition by
	``urlpath.replace('*', name_function(partition_index))``.
	storage_options: dict, optional
	Additional keywords to pass to the filesystem class.
	protocol: str or None
	To override the protocol specifier in the URL
	expand: bool
	Expand string paths for writing, assuming the path is a directory
	"""
	if isinstance(urlpath, (list, tuple, set)):
	if not urlpath:
	raise ValueError("empty urlpath sequence")
	urlpath0 = stringify_path(list(urlpath)[0])
	else:
	urlpath0 = stringify_path(urlpath)
	storage_options = storage_options or {}
	if protocol:
	storage_options["protocol"] = protocol
	chain = _un_chain(urlpath0, storage_options or {})
	inkwargs = {}
	# Reverse iterate the chain, creating a nested target_* structure
	for i, ch in enumerate(reversed(chain)):
	urls, nested_protocol, kw = ch
	if i == len(chain) - 1:
	inkwargs = dict(kw, inkwargs)
	continue
	inkwargs["target_options"] = dict(kw, inkwargs)
	inkwargs["target_protocol"] = nested_protocol
	inkwargs["fo"] = urls
	paths, protocol, _ = chain[0]
	fs = filesystem(protocol, **inkwargs)
	if isinstance(urlpath, (list, tuple, set)):
	pchains = [
	_un_chain(stringify_path(u), storage_options or {})[0] for u in urlpath
	]
	if len(set(pc[1] for pc in pchains)) > 1:
	raise ValueError("Protocol mismatch getting fs from %s", urlpath)
	paths = [pc[0] for pc in pchains]
	else:
	paths = fs._strip_protocol(paths)
	if isinstance(paths, (list, tuple, set)):
	paths = expand_paths_if_needed(paths, mode, num, fs, name_function)
	else:
	if "w" in mode and expand:
	paths = _expand_paths(paths, name_function, num)
	elif "*" in paths:
	paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)]
	else:
	paths = [paths]

	return fs, fs._fs_token, paths


	def _expand_paths(path, name_function, num):
	if isinstance(path, str):
	if path.count("*") > 1:
	raise ValueError("Output path spec must contain exactly one '*'.")
	elif "*" not in path:
	path = os.path.join(path, "*.part")

	if name_function is None:
	name_function = build_name_function(num - 1)

	paths = [path.replace("*", name_function(i)) for i in range(num)]
	if paths != sorted(paths):
	logger.warning(
	"In order to preserve order between partitions"
	" paths created with ``name_function`` should "
	"sort to partition order"
	)
	elif isinstance(path, (tuple, list)):
	assert len(path) == num
	paths = list(path)
	else:
	raise ValueError(
	"Path should be either\n"
	"1. A list of paths: ['foo.json', 'bar.json', ...]\n"
	"2. A directory: 'foo/\n"
	"3. A path with a '' in it: 'foo..json'"
	)
	return paths


	class PickleableTextIOWrapper(io.TextIOWrapper):
	"""TextIOWrapper cannot be pickled. This solves it.

	Requires that ``buffer`` be pickleable, which all instances of
	AbstractBufferedFile are.
	"""

	def __init__(
	self,
	buffer,
	encoding=None,
	errors=None,
	newline=None,
	line_buffering=False,
	write_through=False,
	):
	self.args = buffer, encoding, errors, newline, line_buffering, write_through
	super().__init__(*self.args)

	def __reduce__(self):
	return PickleableTextIOWrapper, self.args