|
"""A dumb and slow but simple dbm clone. |
|
|
|
For database spam, spam.dir contains the index (a text file), |
|
spam.bak *may* contain a backup of the index (also a text file), |
|
while spam.dat contains the data (a binary file). |
|
|
|
XXX TO DO: |
|
|
|
- seems to contain a bug when updating... |
|
|
|
- reclaim free space (currently, space once occupied by deleted or expanded |
|
items is never reused) |
|
|
|
- support concurrent access (currently, if two processes take turns making |
|
updates, they can mess up the index) |
|
|
|
- support efficient access to large databases (currently, the whole index |
|
is read when the database is opened, and some updates rewrite the whole index) |
|
|
|
- support opening for read-only (flag = 'm') |
|
|
|
""" |
|
|
|
import ast as _ast |
|
import io as _io |
|
import os as _os |
|
import collections.abc |
|
|
|
__all__ = ["error", "open"] |
|
|
|
_BLOCKSIZE = 512 |
|
|
|
error = OSError |
|
|
|
class _Database(collections.abc.MutableMapping): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_os = _os |
|
_io = _io |
|
|
|
def __init__(self, filebasename, mode, flag='c'): |
|
filebasename = self._os.fsencode(filebasename) |
|
self._mode = mode |
|
self._readonly = (flag == 'r') |
|
|
|
|
|
|
|
|
|
|
|
|
|
self._dirfile = filebasename + b'.dir' |
|
|
|
|
|
|
|
|
|
|
|
self._datfile = filebasename + b'.dat' |
|
self._bakfile = filebasename + b'.bak' |
|
|
|
|
|
self._index = None |
|
|
|
|
|
self._create(flag) |
|
self._update(flag) |
|
|
|
def _create(self, flag): |
|
if flag == 'n': |
|
for filename in (self._datfile, self._bakfile, self._dirfile): |
|
try: |
|
_os.remove(filename) |
|
except OSError: |
|
pass |
|
|
|
try: |
|
f = _io.open(self._datfile, 'r', encoding="Latin-1") |
|
except OSError: |
|
if flag not in ('c', 'n'): |
|
raise |
|
with _io.open(self._datfile, 'w', encoding="Latin-1") as f: |
|
self._chmod(self._datfile) |
|
else: |
|
f.close() |
|
|
|
|
|
def _update(self, flag): |
|
self._modified = False |
|
self._index = {} |
|
try: |
|
f = _io.open(self._dirfile, 'r', encoding="Latin-1") |
|
except OSError: |
|
if flag not in ('c', 'n'): |
|
raise |
|
self._modified = True |
|
else: |
|
with f: |
|
for line in f: |
|
line = line.rstrip() |
|
key, pos_and_siz_pair = _ast.literal_eval(line) |
|
key = key.encode('Latin-1') |
|
self._index[key] = pos_and_siz_pair |
|
|
|
|
|
|
|
|
|
def _commit(self): |
|
|
|
|
|
|
|
if self._index is None or not self._modified: |
|
return |
|
|
|
try: |
|
self._os.unlink(self._bakfile) |
|
except OSError: |
|
pass |
|
|
|
try: |
|
self._os.rename(self._dirfile, self._bakfile) |
|
except OSError: |
|
pass |
|
|
|
with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f: |
|
self._chmod(self._dirfile) |
|
for key, pos_and_siz_pair in self._index.items(): |
|
|
|
|
|
entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair) |
|
f.write(entry) |
|
|
|
sync = _commit |
|
|
|
def _verify_open(self): |
|
if self._index is None: |
|
raise error('DBM object has already been closed') |
|
|
|
def __getitem__(self, key): |
|
if isinstance(key, str): |
|
key = key.encode('utf-8') |
|
self._verify_open() |
|
pos, siz = self._index[key] |
|
with _io.open(self._datfile, 'rb') as f: |
|
f.seek(pos) |
|
dat = f.read(siz) |
|
return dat |
|
|
|
|
|
|
|
|
|
|
|
def _addval(self, val): |
|
with _io.open(self._datfile, 'rb+') as f: |
|
f.seek(0, 2) |
|
pos = int(f.tell()) |
|
npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE |
|
f.write(b'\0'*(npos-pos)) |
|
pos = npos |
|
f.write(val) |
|
return (pos, len(val)) |
|
|
|
|
|
|
|
|
|
|
|
def _setval(self, pos, val): |
|
with _io.open(self._datfile, 'rb+') as f: |
|
f.seek(pos) |
|
f.write(val) |
|
return (pos, len(val)) |
|
|
|
|
|
|
|
|
|
def _addkey(self, key, pos_and_siz_pair): |
|
self._index[key] = pos_and_siz_pair |
|
with _io.open(self._dirfile, 'a', encoding="Latin-1") as f: |
|
self._chmod(self._dirfile) |
|
f.write("%r, %r\n" % (key.decode("Latin-1"), pos_and_siz_pair)) |
|
|
|
def __setitem__(self, key, val): |
|
if self._readonly: |
|
raise error('The database is opened for reading only') |
|
if isinstance(key, str): |
|
key = key.encode('utf-8') |
|
elif not isinstance(key, (bytes, bytearray)): |
|
raise TypeError("keys must be bytes or strings") |
|
if isinstance(val, str): |
|
val = val.encode('utf-8') |
|
elif not isinstance(val, (bytes, bytearray)): |
|
raise TypeError("values must be bytes or strings") |
|
self._verify_open() |
|
self._modified = True |
|
if key not in self._index: |
|
self._addkey(key, self._addval(val)) |
|
else: |
|
|
|
|
|
pos, siz = self._index[key] |
|
oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE |
|
newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE |
|
if newblocks <= oldblocks: |
|
self._index[key] = self._setval(pos, val) |
|
else: |
|
|
|
|
|
|
|
self._index[key] = self._addval(val) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __delitem__(self, key): |
|
if self._readonly: |
|
raise error('The database is opened for reading only') |
|
if isinstance(key, str): |
|
key = key.encode('utf-8') |
|
self._verify_open() |
|
self._modified = True |
|
|
|
del self._index[key] |
|
|
|
|
|
|
|
|
|
self._commit() |
|
|
|
def keys(self): |
|
try: |
|
return list(self._index) |
|
except TypeError: |
|
raise error('DBM object has already been closed') from None |
|
|
|
def items(self): |
|
self._verify_open() |
|
return [(key, self[key]) for key in self._index.keys()] |
|
|
|
def __contains__(self, key): |
|
if isinstance(key, str): |
|
key = key.encode('utf-8') |
|
try: |
|
return key in self._index |
|
except TypeError: |
|
if self._index is None: |
|
raise error('DBM object has already been closed') from None |
|
else: |
|
raise |
|
|
|
def iterkeys(self): |
|
try: |
|
return iter(self._index) |
|
except TypeError: |
|
raise error('DBM object has already been closed') from None |
|
__iter__ = iterkeys |
|
|
|
def __len__(self): |
|
try: |
|
return len(self._index) |
|
except TypeError: |
|
raise error('DBM object has already been closed') from None |
|
|
|
def close(self): |
|
try: |
|
self._commit() |
|
finally: |
|
self._index = self._datfile = self._dirfile = self._bakfile = None |
|
|
|
__del__ = close |
|
|
|
def _chmod(self, file): |
|
self._os.chmod(file, self._mode) |
|
|
|
def __enter__(self): |
|
return self |
|
|
|
def __exit__(self, *args): |
|
self.close() |
|
|
|
|
|
def open(file, flag='c', mode=0o666): |
|
"""Open the database file, filename, and return corresponding object. |
|
|
|
The flag argument, used to control how the database is opened in the |
|
other DBM implementations, supports only the semantics of 'c' and 'n' |
|
values. Other values will default to the semantics of 'c' value: |
|
the database will always opened for update and will be created if it |
|
does not exist. |
|
|
|
The optional mode argument is the UNIX mode of the file, used only when |
|
the database has to be created. It defaults to octal code 0o666 (and |
|
will be modified by the prevailing umask). |
|
|
|
""" |
|
|
|
|
|
try: |
|
um = _os.umask(0) |
|
_os.umask(um) |
|
except AttributeError: |
|
pass |
|
else: |
|
|
|
mode = mode & (~um) |
|
if flag not in ('r', 'w', 'c', 'n'): |
|
raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n'") |
|
return _Database(file, mode, flag=flag) |
|
|