Spaces:
Running
Running
import glob | |
import json | |
import os | |
import pickle | |
import subprocess | |
import sys | |
from collections import defaultdict | |
from pathlib import Path | |
import numpy as np | |
import pytest | |
import fsspec | |
from fsspec.implementations.ftp import FTPFileSystem | |
from fsspec.implementations.http import HTTPFileSystem | |
from fsspec.implementations.local import LocalFileSystem | |
from fsspec.spec import AbstractBufferedFile, AbstractFileSystem | |
PATHS_FOR_GLOB_TESTS = ( | |
{"name": "test0.json", "type": "file", "size": 100}, | |
{"name": "test0.yaml", "type": "file", "size": 100}, | |
{"name": "test0", "type": "directory", "size": 0}, | |
{"name": "test0/test0.json", "type": "file", "size": 100}, | |
{"name": "test0/test0.yaml", "type": "file", "size": 100}, | |
{"name": "test0/test1", "type": "directory", "size": 0}, | |
{"name": "test0/test1/test0.json", "type": "file", "size": 100}, | |
{"name": "test0/test1/test0.yaml", "type": "file", "size": 100}, | |
{"name": "test0/test1/test2", "type": "directory", "size": 0}, | |
{"name": "test0/test1/test2/test0.json", "type": "file", "size": 100}, | |
{"name": "test0/test1/test2/test0.yaml", "type": "file", "size": 100}, | |
{"name": "test0/test2", "type": "directory", "size": 0}, | |
{"name": "test0/test2/test0.json", "type": "file", "size": 100}, | |
{"name": "test0/test2/test0.yaml", "type": "file", "size": 100}, | |
{"name": "test0/test2/test1", "type": "directory", "size": 0}, | |
{"name": "test0/test2/test1/test0.json", "type": "file", "size": 100}, | |
{"name": "test0/test2/test1/test0.yaml", "type": "file", "size": 100}, | |
{"name": "test0/test2/test1/test3", "type": "directory", "size": 0}, | |
{"name": "test0/test2/test1/test3/test0.json", "type": "file", "size": 100}, | |
{"name": "test0/test2/test1/test3/test0.yaml", "type": "file", "size": 100}, | |
{"name": "test1.json", "type": "file", "size": 100}, | |
{"name": "test1.yaml", "type": "file", "size": 100}, | |
{"name": "test1", "type": "directory", "size": 0}, | |
{"name": "test1/test0.json", "type": "file", "size": 100}, | |
{"name": "test1/test0.yaml", "type": "file", "size": 100}, | |
{"name": "test1/test0", "type": "directory", "size": 0}, | |
{"name": "test1/test0/test0.json", "type": "file", "size": 100}, | |
{"name": "test1/test0/test0.yaml", "type": "file", "size": 100}, | |
{"name": "special_chars", "type": "directory", "size": 0}, | |
{"name": "special_chars/f\\oo.txt", "type": "file", "size": 100}, | |
{"name": "special_chars/f.oo.txt", "type": "file", "size": 100}, | |
{"name": "special_chars/f+oo.txt", "type": "file", "size": 100}, | |
{"name": "special_chars/f(oo.txt", "type": "file", "size": 100}, | |
{"name": "special_chars/f)oo.txt", "type": "file", "size": 100}, | |
{"name": "special_chars/f|oo.txt", "type": "file", "size": 100}, | |
{"name": "special_chars/f^oo.txt", "type": "file", "size": 100}, | |
{"name": "special_chars/f$oo.txt", "type": "file", "size": 100}, | |
{"name": "special_chars/f{oo.txt", "type": "file", "size": 100}, | |
{"name": "special_chars/f}oo.txt", "type": "file", "size": 100}, | |
) | |
GLOB_POSIX_TESTS = { | |
"argnames": ("path", "expected"), | |
"argvalues": [ | |
("nonexistent", []), | |
("test0.json", ["test0.json"]), | |
("test0", ["test0"]), | |
("test0/", ["test0"]), | |
("test1/test0.yaml", ["test1/test0.yaml"]), | |
("test0/test[1-2]", ["test0/test1", "test0/test2"]), | |
("test0/test[1-2]/", ["test0/test1", "test0/test2"]), | |
( | |
"test0/test[1-2]/*", | |
[ | |
"test0/test1/test0.json", | |
"test0/test1/test0.yaml", | |
"test0/test1/test2", | |
"test0/test2/test0.json", | |
"test0/test2/test0.yaml", | |
"test0/test2/test1", | |
], | |
), | |
( | |
"test0/test[1-2]/*.[j]*", | |
["test0/test1/test0.json", "test0/test2/test0.json"], | |
), | |
("special_chars/f\\oo.*", ["special_chars/f\\oo.txt"]), | |
("special_chars/f.oo.*", ["special_chars/f.oo.txt"]), | |
("special_chars/f+oo.*", ["special_chars/f+oo.txt"]), | |
("special_chars/f(oo.*", ["special_chars/f(oo.txt"]), | |
("special_chars/f)oo.*", ["special_chars/f)oo.txt"]), | |
("special_chars/f|oo.*", ["special_chars/f|oo.txt"]), | |
("special_chars/f^oo.*", ["special_chars/f^oo.txt"]), | |
("special_chars/f$oo.*", ["special_chars/f$oo.txt"]), | |
("special_chars/f{oo.*", ["special_chars/f{oo.txt"]), | |
("special_chars/f}oo.*", ["special_chars/f}oo.txt"]), | |
( | |
"*", | |
[ | |
"special_chars", | |
"test0.json", | |
"test0.yaml", | |
"test0", | |
"test1.json", | |
"test1.yaml", | |
"test1", | |
], | |
), | |
("*.yaml", ["test0.yaml", "test1.yaml"]), | |
( | |
"**", | |
[ | |
"special_chars", | |
"special_chars/f$oo.txt", | |
"special_chars/f(oo.txt", | |
"special_chars/f)oo.txt", | |
"special_chars/f+oo.txt", | |
"special_chars/f.oo.txt", | |
"special_chars/f\\oo.txt", | |
"special_chars/f^oo.txt", | |
"special_chars/f{oo.txt", | |
"special_chars/f|oo.txt", | |
"special_chars/f}oo.txt", | |
"test0.json", | |
"test0.yaml", | |
"test0", | |
"test0/test0.json", | |
"test0/test0.yaml", | |
"test0/test1", | |
"test0/test1/test0.json", | |
"test0/test1/test0.yaml", | |
"test0/test1/test2", | |
"test0/test1/test2/test0.json", | |
"test0/test1/test2/test0.yaml", | |
"test0/test2", | |
"test0/test2/test0.json", | |
"test0/test2/test0.yaml", | |
"test0/test2/test1", | |
"test0/test2/test1/test0.json", | |
"test0/test2/test1/test0.yaml", | |
"test0/test2/test1/test3", | |
"test0/test2/test1/test3/test0.json", | |
"test0/test2/test1/test3/test0.yaml", | |
"test1.json", | |
"test1.yaml", | |
"test1", | |
"test1/test0.json", | |
"test1/test0.yaml", | |
"test1/test0", | |
"test1/test0/test0.json", | |
"test1/test0/test0.yaml", | |
], | |
), | |
("*/", ["special_chars", "test0", "test1"]), | |
( | |
"**/", | |
[ | |
"special_chars", | |
"test0", | |
"test0/test1", | |
"test0/test1/test2", | |
"test0/test2", | |
"test0/test2/test1", | |
"test0/test2/test1/test3", | |
"test1", | |
"test1/test0", | |
], | |
), | |
("*/*.yaml", ["test0/test0.yaml", "test1/test0.yaml"]), | |
( | |
"**/*.yaml", | |
[ | |
"test0.yaml", | |
"test0/test0.yaml", | |
"test0/test1/test0.yaml", | |
"test0/test1/test2/test0.yaml", | |
"test0/test2/test0.yaml", | |
"test0/test2/test1/test0.yaml", | |
"test0/test2/test1/test3/test0.yaml", | |
"test1.yaml", | |
"test1/test0.yaml", | |
"test1/test0/test0.yaml", | |
], | |
), | |
( | |
"*/test1/*", | |
["test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2"], | |
), | |
("*/test1/*.yaml", ["test0/test1/test0.yaml"]), | |
( | |
"**/test1/*", | |
[ | |
"test0/test1/test0.json", | |
"test0/test1/test0.yaml", | |
"test0/test1/test2", | |
"test0/test2/test1/test0.json", | |
"test0/test2/test1/test0.yaml", | |
"test0/test2/test1/test3", | |
"test1/test0.json", | |
"test1/test0.yaml", | |
"test1/test0", | |
], | |
), | |
( | |
"**/test1/*.yaml", | |
[ | |
"test0/test1/test0.yaml", | |
"test0/test2/test1/test0.yaml", | |
"test1/test0.yaml", | |
], | |
), | |
("*/test1/*/", ["test0/test1/test2"]), | |
( | |
"**/test1/*/", | |
["test0/test1/test2", "test0/test2/test1/test3", "test1/test0"], | |
), | |
( | |
"*/test1/**", | |
[ | |
"test0/test1", | |
"test0/test1/test0.json", | |
"test0/test1/test0.yaml", | |
"test0/test1/test2", | |
"test0/test1/test2/test0.json", | |
"test0/test1/test2/test0.yaml", | |
], | |
), | |
( | |
"**/test1/**", | |
[ | |
"test0/test1", | |
"test0/test1/test0.json", | |
"test0/test1/test0.yaml", | |
"test0/test1/test2", | |
"test0/test1/test2/test0.json", | |
"test0/test1/test2/test0.yaml", | |
"test0/test2/test1", | |
"test0/test2/test1/test0.json", | |
"test0/test2/test1/test0.yaml", | |
"test0/test2/test1/test3", | |
"test0/test2/test1/test3/test0.json", | |
"test0/test2/test1/test3/test0.yaml", | |
"test1", | |
"test1/test0.json", | |
"test1/test0.yaml", | |
"test1/test0", | |
"test1/test0/test0.json", | |
"test1/test0/test0.yaml", | |
], | |
), | |
("*/test1/**/", ["test0/test1", "test0/test1/test2"]), | |
( | |
"**/test1/**/", | |
[ | |
"test0/test1", | |
"test0/test1/test2", | |
"test0/test2/test1", | |
"test0/test2/test1/test3", | |
"test1", | |
"test1/test0", | |
], | |
), | |
( | |
"test0/*", | |
["test0/test0.json", "test0/test0.yaml", "test0/test1", "test0/test2"], | |
), | |
("test0/*.yaml", ["test0/test0.yaml"]), | |
( | |
"test0/**", | |
[ | |
"test0", | |
"test0/test0.json", | |
"test0/test0.yaml", | |
"test0/test1", | |
"test0/test1/test0.json", | |
"test0/test1/test0.yaml", | |
"test0/test1/test2", | |
"test0/test1/test2/test0.json", | |
"test0/test1/test2/test0.yaml", | |
"test0/test2", | |
"test0/test2/test0.json", | |
"test0/test2/test0.yaml", | |
"test0/test2/test1", | |
"test0/test2/test1/test0.json", | |
"test0/test2/test1/test0.yaml", | |
"test0/test2/test1/test3", | |
"test0/test2/test1/test3/test0.json", | |
"test0/test2/test1/test3/test0.yaml", | |
], | |
), | |
("test0/*/", ["test0/test1", "test0/test2"]), | |
( | |
"test0/**/", | |
[ | |
"test0", | |
"test0/test1", | |
"test0/test1/test2", | |
"test0/test2", | |
"test0/test2/test1", | |
"test0/test2/test1/test3", | |
], | |
), | |
("test0/*/*.yaml", ["test0/test1/test0.yaml", "test0/test2/test0.yaml"]), | |
( | |
"test0/**/*.yaml", | |
[ | |
"test0/test0.yaml", | |
"test0/test1/test0.yaml", | |
"test0/test1/test2/test0.yaml", | |
"test0/test2/test0.yaml", | |
"test0/test2/test1/test0.yaml", | |
"test0/test2/test1/test3/test0.yaml", | |
], | |
), | |
( | |
"test0/*/test1/*", | |
[ | |
"test0/test2/test1/test0.json", | |
"test0/test2/test1/test0.yaml", | |
"test0/test2/test1/test3", | |
], | |
), | |
("test0/*/test1/*.yaml", ["test0/test2/test1/test0.yaml"]), | |
( | |
"test0/**/test1/*", | |
[ | |
"test0/test1/test0.json", | |
"test0/test1/test0.yaml", | |
"test0/test1/test2", | |
"test0/test2/test1/test0.json", | |
"test0/test2/test1/test0.yaml", | |
"test0/test2/test1/test3", | |
], | |
), | |
( | |
"test0/**/test1/*.yaml", | |
["test0/test1/test0.yaml", "test0/test2/test1/test0.yaml"], | |
), | |
("test0/*/test1/*/", ["test0/test2/test1/test3"]), | |
("test0/**/test1/*/", ["test0/test1/test2", "test0/test2/test1/test3"]), | |
( | |
"test0/*/test1/**", | |
[ | |
"test0/test2/test1", | |
"test0/test2/test1/test0.json", | |
"test0/test2/test1/test0.yaml", | |
"test0/test2/test1/test3", | |
"test0/test2/test1/test3/test0.json", | |
"test0/test2/test1/test3/test0.yaml", | |
], | |
), | |
( | |
"test0/**/test1/**", | |
[ | |
"test0/test1", | |
"test0/test1/test0.json", | |
"test0/test1/test0.yaml", | |
"test0/test1/test2", | |
"test0/test1/test2/test0.json", | |
"test0/test1/test2/test0.yaml", | |
"test0/test2/test1", | |
"test0/test2/test1/test0.json", | |
"test0/test2/test1/test0.yaml", | |
"test0/test2/test1/test3", | |
"test0/test2/test1/test3/test0.json", | |
"test0/test2/test1/test3/test0.yaml", | |
], | |
), | |
("test0/*/test1/**/", ["test0/test2/test1", "test0/test2/test1/test3"]), | |
( | |
"test0/**/test1/**/", | |
[ | |
"test0/test1", | |
"test0/test1/test2", | |
"test0/test2/test1", | |
"test0/test2/test1/test3", | |
], | |
), | |
], | |
} | |
class DummyTestFS(AbstractFileSystem): | |
protocol = "mock" | |
_file_class = AbstractBufferedFile | |
_fs_contents = ( | |
{"name": "top_level", "type": "directory"}, | |
{"name": "top_level/second_level", "type": "directory"}, | |
{"name": "top_level/second_level/date=2019-10-01", "type": "directory"}, | |
{ | |
"name": "top_level/second_level/date=2019-10-01/a.parquet", | |
"type": "file", | |
"size": 100, | |
}, | |
{ | |
"name": "top_level/second_level/date=2019-10-01/b.parquet", | |
"type": "file", | |
"size": 100, | |
}, | |
{"name": "top_level/second_level/date=2019-10-02", "type": "directory"}, | |
{ | |
"name": "top_level/second_level/date=2019-10-02/a.parquet", | |
"type": "file", | |
"size": 100, | |
}, | |
{"name": "top_level/second_level/date=2019-10-04", "type": "directory"}, | |
{ | |
"name": "top_level/second_level/date=2019-10-04/a.parquet", | |
"type": "file", | |
"size": 100, | |
}, | |
{"name": "misc", "type": "directory"}, | |
{"name": "misc/foo.txt", "type": "file", "size": 100}, | |
) | |
def __init__(self, fs_content=None, **kwargs): | |
if fs_content is not None: | |
self._fs_contents = fs_content | |
super().__init__(**kwargs) | |
def __getitem__(self, name): | |
for item in self._fs_contents: | |
if item["name"] == name: | |
return item | |
raise IndexError(f"{name} not found!") | |
def ls(self, path, detail=True, refresh=True, **kwargs): | |
if kwargs.pop("strip_proto", True): | |
path = self._strip_protocol(path) | |
files = not refresh and self._ls_from_cache(path) | |
if not files: | |
files = [ | |
file for file in self._fs_contents if path == self._parent(file["name"]) | |
] | |
files.sort(key=lambda file: file["name"]) | |
self.dircache[path.rstrip("/")] = files | |
if detail: | |
return files | |
return [file["name"] for file in files] | |
def get_test_paths(cls, start_with=""): | |
"""Helper to return directory and file paths with no details""" | |
all = [ | |
file["name"] | |
for file in cls._fs_contents | |
if file["name"].startswith(start_with) | |
] | |
return all | |
def _open( | |
self, | |
path, | |
mode="rb", | |
block_size=None, | |
autocommit=True, | |
cache_options=None, | |
**kwargs, | |
): | |
return self._file_class( | |
self, | |
path, | |
mode, | |
block_size, | |
autocommit, | |
cache_options=cache_options, | |
**kwargs, | |
) | |
def test_expand_path(test_paths, recursive, maxdepth, expected): | |
"""Test a number of paths and then their combination which should all yield | |
the same set of expanded paths""" | |
test_fs = DummyTestFS() | |
# test single query | |
for test_path in test_paths: | |
paths = test_fs.expand_path(test_path, recursive=recursive, maxdepth=maxdepth) | |
assert sorted(paths) == sorted(expected) | |
# test with all queries | |
paths = test_fs.expand_path( | |
list(test_paths), recursive=recursive, maxdepth=maxdepth | |
) | |
assert sorted(paths) == sorted(expected) | |
def test_expand_paths_with_wrong_args(): | |
test_fs = DummyTestFS() | |
with pytest.raises(ValueError): | |
test_fs.expand_path("top_level", recursive=True, maxdepth=0) | |
with pytest.raises(ValueError): | |
test_fs.expand_path("top_level", maxdepth=0) | |
with pytest.raises(FileNotFoundError): | |
test_fs.expand_path("top_level/**/second_level/date=2019-10-02", maxdepth=1) | |
with pytest.raises(FileNotFoundError): | |
test_fs.expand_path("nonexistent/*") | |
def test_find(): | |
"""Test .find() method on debian server (ftp, https) with constant folder""" | |
filesystem, host, test_path = ( | |
FTPFileSystem, | |
"ftp.fau.de", | |
"ftp://ftp.fau.de/debian-cd/current/amd64/log/success", | |
) | |
test_fs = filesystem(host) | |
filenames_ftp = test_fs.find(test_path) | |
assert filenames_ftp | |
filesystem, host, test_path = ( | |
HTTPFileSystem, | |
"https://ftp.fau.de", | |
"https://ftp.fau.de/debian-cd/current/amd64/log/success", | |
) | |
test_fs = filesystem() | |
filenames_http = test_fs.find(test_path) | |
roots = [f.rsplit("/", 1)[-1] for f in filenames_http] | |
assert all(f.rsplit("/", 1)[-1] in roots for f in filenames_ftp) | |
def test_find_details(): | |
test_fs = DummyTestFS() | |
filenames = test_fs.find("/") | |
details = test_fs.find("/", detail=True) | |
for filename in filenames: | |
assert details[filename] == test_fs.info(filename) | |
def test_find_file(): | |
test_fs = DummyTestFS() | |
filename = "misc/foo.txt" | |
assert test_fs.find(filename) == [filename] | |
assert test_fs.find(filename, detail=True) == {filename: {}} | |
def test_cache(): | |
fs = DummyTestFS() | |
fs2 = DummyTestFS() | |
assert fs is fs2 | |
assert DummyTestFS.current() is fs | |
assert len(fs._cache) == 1 | |
del fs2 | |
assert len(fs._cache) == 1 | |
del fs | |
# keeps and internal reference, doesn't get collected | |
assert len(DummyTestFS._cache) == 1 | |
DummyTestFS.clear_instance_cache() | |
assert len(DummyTestFS._cache) == 0 | |
def test_current(): | |
fs = DummyTestFS() | |
fs2 = DummyTestFS(arg=1) | |
assert fs is not fs2 | |
assert DummyTestFS.current() is fs2 | |
DummyTestFS() | |
assert DummyTestFS.current() is fs | |
def test_alias(): | |
with pytest.warns(FutureWarning, match="add_aliases"): | |
DummyTestFS(add_aliases=True) | |
def test_add_docs_warns(): | |
with pytest.warns(FutureWarning, match="add_docs"): | |
AbstractFileSystem(add_docs=True) | |
def test_cache_options(): | |
fs = DummyTestFS() | |
f = AbstractBufferedFile(fs, "misc/foo.txt", cache_type="bytes") | |
assert f.cache.trim | |
# TODO: dummy buffered file | |
f = AbstractBufferedFile( | |
fs, "misc/foo.txt", cache_type="bytes", cache_options={"trim": False} | |
) | |
assert f.cache.trim is False | |
f = fs.open("misc/foo.txt", cache_type="bytes", cache_options={"trim": False}) | |
assert f.cache.trim is False | |
def test_trim_kwarg_warns(): | |
fs = DummyTestFS() | |
with pytest.warns(FutureWarning, match="cache_options"): | |
AbstractBufferedFile(fs, "misc/foo.txt", cache_type="bytes", trim=False) | |
def tests_file_open_error(monkeypatch): | |
class InitiateError(ValueError): ... | |
class UploadError(ValueError): ... | |
class DummyBufferedFile(AbstractBufferedFile): | |
can_initiate = False | |
def _initiate_upload(self): | |
if not self.can_initiate: | |
raise InitiateError | |
def _upload_chunk(self, final=False): | |
raise UploadError | |
monkeypatch.setattr(DummyTestFS, "_file_class", DummyBufferedFile) | |
fs = DummyTestFS() | |
with pytest.raises(InitiateError): | |
with fs.open("misc/foo.txt", "wb") as stream: | |
stream.write(b"hello" * stream.blocksize * 2) | |
with pytest.raises(UploadError): | |
with fs.open("misc/foo.txt", "wb") as stream: | |
stream.can_initiate = True | |
stream.write(b"hello" * stream.blocksize * 2) | |
def test_eq(): | |
fs = DummyTestFS() | |
result = fs == 1 | |
assert result is False | |
f = AbstractBufferedFile(fs, "misc/foo.txt", cache_type="bytes") | |
result = f == 1 | |
assert result is False | |
def test_pickle_multiple(): | |
a = DummyTestFS(1) | |
b = DummyTestFS(2, bar=1) | |
x = pickle.dumps(a) | |
y = pickle.dumps(b) | |
del a, b | |
DummyTestFS.clear_instance_cache() | |
result = pickle.loads(x) | |
assert result.storage_args == (1,) | |
assert result.storage_options == {} | |
result = pickle.loads(y) | |
assert result.storage_args == (2,) | |
assert result.storage_options == {"bar": 1} | |
def test_json(): | |
a = DummyTestFS(1) | |
b = DummyTestFS(2, bar=1) | |
outa = a.to_json() | |
outb = b.to_json() | |
assert json.loads(outb) # is valid JSON | |
assert a != b | |
assert "bar" in outb | |
assert DummyTestFS.from_json(outa) is a | |
assert DummyTestFS.from_json(outb) is b | |
def test_ls_from_cache(): | |
fs = DummyTestFS() | |
uncached_results = fs.ls("top_level/second_level/", refresh=True) | |
assert fs.ls("top_level/second_level/", refresh=False) == uncached_results | |
# _strip_protocol removes everything by default though | |
# for the sake of testing the _ls_from_cache interface | |
# directly, we need run one time more without that call | |
# to actually verify that our stripping in the client | |
# function works. | |
assert ( | |
fs.ls("top_level/second_level/", refresh=False, strip_proto=False) | |
== uncached_results | |
) | |
def test_readinto_with_numpy(tmpdir, dt): | |
store_path = str(tmpdir / "test_arr.npy") | |
arr = np.arange(10, dtype=dt) | |
arr.tofile(store_path) | |
arr2 = np.empty_like(arr) | |
with fsspec.open(store_path, "rb") as f: | |
f.readinto(arr2) | |
assert np.array_equal(arr, arr2) | |
def test_readinto_with_multibyte(ftp_writable, tmpdir, dt): | |
host, port, user, pw = ftp_writable | |
ftp = FTPFileSystem(host=host, port=port, username=user, password=pw) | |
with ftp.open("/out", "wb") as fp: | |
arr = np.arange(10, dtype=dt) | |
fp.write(arr.tobytes()) | |
with ftp.open("/out", "rb") as fp: | |
arr2 = np.empty_like(arr) | |
fp.readinto(arr2) | |
assert np.array_equal(arr, arr2) | |
class DummyOpenFS(DummyTestFS): | |
blocksize = 10 | |
def _open(self, path, mode="rb", **kwargs): | |
stream = open(path, mode) | |
stream.size = os.stat(path).st_size | |
return stream | |
class BasicCallback(fsspec.Callback): | |
def __init__(self, **kwargs): | |
self.events = [] | |
super().__init__(**kwargs) | |
def set_size(self, size): | |
self.events.append(("set_size", size)) | |
def relative_update(self, inc=1): | |
self.events.append(("relative_update", inc)) | |
def imitate_transfer(size, chunk, *, file=True): | |
events = [("set_size", size)] | |
events.extend(("relative_update", size // chunk) for _ in range(chunk)) | |
if file: | |
# The reason that there is a relative_update(0) at the | |
# end is that, we don't have an early exit on the | |
# implementations of get_file/put_file so it needs to | |
# go through the callback to get catch by the while's | |
# condition and then it will stop the transfer. | |
events.append(("relative_update", 0)) | |
return events | |
def get_files(tmpdir, amount=10): | |
src, dest, base = [], [], [] | |
for index in range(amount): | |
src_path = tmpdir / f"src_{index}.txt" | |
src_path.write_text("x" * 50, "utf-8") | |
src.append(str(src_path)) | |
dest.append(str(tmpdir / f"dst_{index}.txt")) | |
base.append(str(tmpdir / f"file_{index}.txt")) | |
return src, dest, base | |
def test_dummy_callbacks_file(tmpdir): | |
fs = DummyOpenFS() | |
callback = BasicCallback() | |
file = tmpdir / "file.txt" | |
source = tmpdir / "tmp.txt" | |
destination = tmpdir / "tmp2.txt" | |
size = 100 | |
source.write_text("x" * 100, "utf-8") | |
fs.put_file(source, file, callback=callback) | |
# -1 here since put_file no longer has final zero-size put | |
assert callback.events == imitate_transfer(size, 10)[:-1] | |
callback.events.clear() | |
fs.get_file(file, destination, callback=callback) | |
assert callback.events == imitate_transfer(size, 10) | |
callback.events.clear() | |
assert destination.read_text("utf-8") == "x" * 100 | |
def test_dummy_callbacks_files(tmpdir): | |
fs = DummyOpenFS() | |
callback = BasicCallback() | |
src, dest, base = get_files(tmpdir) | |
fs.put(src, base, callback=callback) | |
assert callback.events == imitate_transfer(10, 10, file=False) | |
callback.events.clear() | |
fs.get(base, dest, callback=callback) | |
assert callback.events == imitate_transfer(10, 10, file=False) | |
class BranchableCallback(BasicCallback): | |
def __init__(self, source, dest=None, events=None, **kwargs): | |
super().__init__(**kwargs) | |
if dest: | |
self.key = source, dest | |
else: | |
self.key = (source,) | |
self.events = events or defaultdict(list) | |
def branch(self, path_1, path_2, kwargs): | |
from fsspec.implementations.local import make_path_posix | |
path_1 = make_path_posix(path_1) | |
path_2 = make_path_posix(path_2) | |
kwargs["callback"] = BranchableCallback(path_1, path_2, events=self.events) | |
def set_size(self, size): | |
self.events[self.key].append(("set_size", size)) | |
def relative_update(self, inc=1): | |
self.events[self.key].append(("relative_update", inc)) | |
def test_dummy_callbacks_files_branched(tmpdir): | |
fs = DummyOpenFS() | |
src, dest, base = get_files(tmpdir) | |
callback = BranchableCallback("top-level") | |
def check_events(lpaths, rpaths): | |
from fsspec.implementations.local import make_path_posix | |
base_keys = zip(make_path_posix(lpaths), make_path_posix(rpaths)) | |
assert set(callback.events.keys()) == {("top-level",), *base_keys} | |
assert callback.events["top-level",] == imitate_transfer(10, 10, file=False) | |
for key in base_keys: | |
assert callback.events[key] == imitate_transfer(50, 5) | |
fs.put(src, base, callback=callback) | |
check_events(src, base) | |
callback.events.clear() | |
fs.get(base, dest, callback=callback) | |
check_events(base, dest) | |
callback.events.clear() | |
def _clean_paths(paths, prefix=""): | |
""" | |
Helper to cleanup paths results by doing the following: | |
- remove the prefix provided from all paths | |
- remove the trailing slashes from all paths | |
- remove duplicates paths | |
- sort all paths | |
""" | |
paths_list = paths | |
if isinstance(paths, dict): | |
paths_list = list(paths) | |
paths_list = [p.replace(prefix, "").strip("/") for p in sorted(set(paths_list))] | |
if isinstance(paths, dict): | |
return {p: paths[p] for p in paths_list} | |
return paths_list | |
def glob_fs(): | |
return DummyTestFS(fs_content=PATHS_FOR_GLOB_TESTS) | |
def glob_files_folder(tmp_path): | |
local_fs = LocalFileSystem(auto_mkdir=True) | |
local_fake_dir = str(tmp_path) | |
for path_info in PATHS_FOR_GLOB_TESTS: | |
if path_info["type"] == "file": | |
local_fs.touch(path=f"{str(tmp_path)}/{path_info['name']}") | |
return local_fake_dir | |
def test_posix_tests_python_glob(path, expected, glob_files_folder): | |
""" | |
Tests against python glob to check if our posix tests are accurate. | |
""" | |
os.chdir(glob_files_folder) | |
python_output = glob.glob(pathname=path, recursive=True) | |
assert _clean_paths(python_output, glob_files_folder) == _clean_paths(expected) | |
def test_posix_tests_bash_stat(path, expected, glob_files_folder): | |
""" | |
Tests against bash stat to check if our posix tests are accurate. | |
""" | |
try: | |
subprocess.check_output(["bash", "-c", "shopt -s globstar"]) | |
except FileNotFoundError: | |
pytest.skip("bash is not available") | |
except subprocess.CalledProcessError: | |
pytest.skip("globstar option is not available") | |
bash_path = ( | |
path.replace("\\", "\\\\") | |
.replace("$", "\\$") | |
.replace("(", "\\(") | |
.replace(")", "\\)") | |
.replace("|", "\\|") | |
) | |
bash_output = subprocess.run( | |
[ | |
"bash", | |
"-c", | |
f"cd {glob_files_folder} && shopt -s globstar && stat -c %N {bash_path}", | |
], | |
capture_output=True, | |
check=False, | |
) | |
# Remove the last element always empty | |
bash_output = bash_output.stdout.decode("utf-8").replace("'", "").split("\n")[:-1] | |
assert _clean_paths(bash_output, glob_files_folder) == _clean_paths(expected) | |
def test_glob_posix_rules(path, expected, glob_fs): | |
output = glob_fs.glob(path=f"mock://{path}") | |
assert _clean_paths(output) == _clean_paths(expected) | |
detailed_output = glob_fs.glob(path=f"mock://{path}", detail=True) | |
for name, info in _clean_paths(detailed_output).items(): | |
assert info == glob_fs[name] | |