|
""" |
|
This module contains SMBFileSystem class responsible for handling access to |
|
Windows Samba network shares by using package smbprotocol |
|
""" |
|
|
|
import datetime |
|
import uuid |
|
from stat import S_ISDIR, S_ISLNK |
|
|
|
import smbclient |
|
|
|
from .. import AbstractFileSystem |
|
from ..utils import infer_storage_options |
|
|
|
|
|
|
|
|
|
class SMBFileSystem(AbstractFileSystem): |
|
"""Allow reading and writing to Windows and Samba network shares. |
|
|
|
When using `fsspec.open()` for getting a file-like object the URI |
|
should be specified as this format: |
|
``smb://workgroup;user:password@server:port/share/folder/file.csv``. |
|
|
|
Example:: |
|
|
|
>>> import fsspec |
|
>>> with fsspec.open( |
|
... 'smb://myuser:mypassword@myserver.com/' 'share/folder/file.csv' |
|
... ) as smbfile: |
|
... df = pd.read_csv(smbfile, sep='|', header=None) |
|
|
|
Note that you need to pass in a valid hostname or IP address for the host |
|
component of the URL. Do not use the Windows/NetBIOS machine name for the |
|
host component. |
|
|
|
The first component of the path in the URL points to the name of the shared |
|
folder. Subsequent path components will point to the directory/folder/file. |
|
|
|
The URL components ``workgroup`` , ``user``, ``password`` and ``port`` may be |
|
optional. |
|
|
|
.. note:: |
|
|
|
For working this source require `smbprotocol`_ to be installed, e.g.:: |
|
|
|
$ pip install smbprotocol |
|
# or |
|
# pip install smbprotocol[kerberos] |
|
|
|
.. _smbprotocol: https://github.com/jborean93/smbprotocol#requirements |
|
|
|
Note: if using this with the ``open`` or ``open_files``, with full URLs, |
|
there is no way to tell if a path is relative, so all paths are assumed |
|
to be absolute. |
|
""" |
|
|
|
protocol = "smb" |
|
|
|
|
|
def __init__( |
|
self, |
|
host, |
|
port=None, |
|
username=None, |
|
password=None, |
|
timeout=60, |
|
encrypt=None, |
|
share_access=None, |
|
**kwargs, |
|
): |
|
""" |
|
You can use _get_kwargs_from_urls to get some kwargs from |
|
a reasonable SMB url. |
|
|
|
Authentication will be anonymous or integrated if username/password are not |
|
given. |
|
|
|
Parameters |
|
---------- |
|
host: str |
|
The remote server name/ip to connect to |
|
port: int or None |
|
Port to connect with. Usually 445, sometimes 139. |
|
username: str or None |
|
Username to connect with. Required if Kerberos auth is not being used. |
|
password: str or None |
|
User's password on the server, if using username |
|
timeout: int |
|
Connection timeout in seconds |
|
encrypt: bool |
|
Whether to force encryption or not, once this has been set to True |
|
the session cannot be changed back to False. |
|
share_access: str or None |
|
Specifies the default access applied to file open operations |
|
performed with this file system object. |
|
This affects whether other processes can concurrently open a handle |
|
to the same file. |
|
|
|
- None (the default): exclusively locks the file until closed. |
|
- 'r': Allow other handles to be opened with read access. |
|
- 'w': Allow other handles to be opened with write access. |
|
- 'd': Allow other handles to be opened with delete access. |
|
""" |
|
super().__init__(**kwargs) |
|
self.host = host |
|
self.port = port |
|
self.username = username |
|
self.password = password |
|
self.timeout = timeout |
|
self.encrypt = encrypt |
|
self.temppath = kwargs.pop("temppath", "") |
|
self.share_access = share_access |
|
self._connect() |
|
|
|
@property |
|
def _port(self): |
|
return 445 if self.port is None else self.port |
|
|
|
def _connect(self): |
|
smbclient.register_session( |
|
self.host, |
|
username=self.username, |
|
password=self.password, |
|
port=self._port, |
|
encrypt=self.encrypt, |
|
connection_timeout=self.timeout, |
|
) |
|
|
|
@classmethod |
|
def _strip_protocol(cls, path): |
|
return infer_storage_options(path)["path"] |
|
|
|
@staticmethod |
|
def _get_kwargs_from_urls(path): |
|
|
|
out = infer_storage_options(path) |
|
out.pop("path", None) |
|
out.pop("protocol", None) |
|
return out |
|
|
|
def mkdir(self, path, create_parents=True, **kwargs): |
|
wpath = _as_unc_path(self.host, path) |
|
if create_parents: |
|
smbclient.makedirs(wpath, exist_ok=False, port=self._port, **kwargs) |
|
else: |
|
smbclient.mkdir(wpath, port=self._port, **kwargs) |
|
|
|
def makedirs(self, path, exist_ok=False): |
|
if _share_has_path(path): |
|
wpath = _as_unc_path(self.host, path) |
|
smbclient.makedirs(wpath, exist_ok=exist_ok, port=self._port) |
|
|
|
def rmdir(self, path): |
|
if _share_has_path(path): |
|
wpath = _as_unc_path(self.host, path) |
|
smbclient.rmdir(wpath, port=self._port) |
|
|
|
def info(self, path, **kwargs): |
|
wpath = _as_unc_path(self.host, path) |
|
stats = smbclient.stat(wpath, port=self._port, **kwargs) |
|
if S_ISDIR(stats.st_mode): |
|
stype = "directory" |
|
elif S_ISLNK(stats.st_mode): |
|
stype = "link" |
|
else: |
|
stype = "file" |
|
res = { |
|
"name": path + "/" if stype == "directory" else path, |
|
"size": stats.st_size, |
|
"type": stype, |
|
"uid": stats.st_uid, |
|
"gid": stats.st_gid, |
|
"time": stats.st_atime, |
|
"mtime": stats.st_mtime, |
|
} |
|
return res |
|
|
|
def created(self, path): |
|
"""Return the created timestamp of a file as a datetime.datetime""" |
|
wpath = _as_unc_path(self.host, path) |
|
stats = smbclient.stat(wpath, port=self._port) |
|
return datetime.datetime.fromtimestamp(stats.st_ctime, tz=datetime.timezone.utc) |
|
|
|
def modified(self, path): |
|
"""Return the modified timestamp of a file as a datetime.datetime""" |
|
wpath = _as_unc_path(self.host, path) |
|
stats = smbclient.stat(wpath, port=self._port) |
|
return datetime.datetime.fromtimestamp(stats.st_mtime, tz=datetime.timezone.utc) |
|
|
|
def ls(self, path, detail=True, **kwargs): |
|
unc = _as_unc_path(self.host, path) |
|
listed = smbclient.listdir(unc, port=self._port, **kwargs) |
|
dirs = ["/".join([path.rstrip("/"), p]) for p in listed] |
|
if detail: |
|
dirs = [self.info(d) for d in dirs] |
|
return dirs |
|
|
|
|
|
def _open( |
|
self, |
|
path, |
|
mode="rb", |
|
block_size=-1, |
|
autocommit=True, |
|
cache_options=None, |
|
**kwargs, |
|
): |
|
""" |
|
block_size: int or None |
|
If 0, no buffering, 1, line buffering, >1, buffer that many bytes |
|
|
|
Notes |
|
----- |
|
By specifying 'share_access' in 'kwargs' it is possible to override the |
|
default shared access setting applied in the constructor of this object. |
|
""" |
|
bls = block_size if block_size is not None and block_size >= 0 else -1 |
|
wpath = _as_unc_path(self.host, path) |
|
share_access = kwargs.pop("share_access", self.share_access) |
|
if "w" in mode and autocommit is False: |
|
temp = _as_temp_path(self.host, path, self.temppath) |
|
return SMBFileOpener( |
|
wpath, temp, mode, port=self._port, block_size=bls, **kwargs |
|
) |
|
return smbclient.open_file( |
|
wpath, |
|
mode, |
|
buffering=bls, |
|
share_access=share_access, |
|
port=self._port, |
|
**kwargs, |
|
) |
|
|
|
def copy(self, path1, path2, **kwargs): |
|
"""Copy within two locations in the same filesystem""" |
|
wpath1 = _as_unc_path(self.host, path1) |
|
wpath2 = _as_unc_path(self.host, path2) |
|
smbclient.copyfile(wpath1, wpath2, port=self._port, **kwargs) |
|
|
|
def _rm(self, path): |
|
if _share_has_path(path): |
|
wpath = _as_unc_path(self.host, path) |
|
stats = smbclient.stat(wpath, port=self._port) |
|
if S_ISDIR(stats.st_mode): |
|
smbclient.rmdir(wpath, port=self._port) |
|
else: |
|
smbclient.remove(wpath, port=self._port) |
|
|
|
def mv(self, path1, path2, recursive=None, maxdepth=None, **kwargs): |
|
wpath1 = _as_unc_path(self.host, path1) |
|
wpath2 = _as_unc_path(self.host, path2) |
|
smbclient.rename(wpath1, wpath2, port=self._port, **kwargs) |
|
|
|
|
|
def _as_unc_path(host, path): |
|
rpath = path.replace("/", "\\") |
|
unc = f"\\\\{host}{rpath}" |
|
return unc |
|
|
|
|
|
def _as_temp_path(host, path, temppath): |
|
share = path.split("/")[1] |
|
temp_file = f"/{share}{temppath}/{uuid.uuid4()}" |
|
unc = _as_unc_path(host, temp_file) |
|
return unc |
|
|
|
|
|
def _share_has_path(path): |
|
parts = path.count("/") |
|
if path.endswith("/"): |
|
return parts > 2 |
|
return parts > 1 |
|
|
|
|
|
class SMBFileOpener: |
|
"""writes to remote temporary file, move on commit""" |
|
|
|
def __init__(self, path, temp, mode, port=445, block_size=-1, **kwargs): |
|
self.path = path |
|
self.temp = temp |
|
self.mode = mode |
|
self.block_size = block_size |
|
self.kwargs = kwargs |
|
self.smbfile = None |
|
self._incontext = False |
|
self.port = port |
|
self._open() |
|
|
|
def _open(self): |
|
if self.smbfile is None or self.smbfile.closed: |
|
self.smbfile = smbclient.open_file( |
|
self.temp, |
|
self.mode, |
|
port=self.port, |
|
buffering=self.block_size, |
|
**self.kwargs, |
|
) |
|
|
|
def commit(self): |
|
"""Move temp file to definitive on success.""" |
|
|
|
smbclient.replace(self.temp, self.path, port=self.port) |
|
|
|
def discard(self): |
|
"""Remove the temp file on failure.""" |
|
smbclient.remove(self.temp, port=self.port) |
|
|
|
def __fspath__(self): |
|
return self.path |
|
|
|
def __iter__(self): |
|
return self.smbfile.__iter__() |
|
|
|
def __getattr__(self, item): |
|
return getattr(self.smbfile, item) |
|
|
|
def __enter__(self): |
|
self._incontext = True |
|
return self.smbfile.__enter__() |
|
|
|
def __exit__(self, exc_type, exc_value, traceback): |
|
self._incontext = False |
|
self.smbfile.__exit__(exc_type, exc_value, traceback) |
|
|