| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| """Git LFS related utilities""" |
|
|
| import io |
| import os |
| from contextlib import AbstractContextManager |
| from typing import BinaryIO |
|
|
|
|
| class SliceFileObj(AbstractContextManager): |
| """ |
| Utility context manager to read a *slice* of a seekable file-like object as a seekable, file-like object. |
| |
| This is NOT thread safe |
| |
| Inspired by stackoverflow.com/a/29838711/593036 |
| |
| Credits to @julien-c |
| |
| Args: |
| fileobj (`BinaryIO`): |
| A file-like object to slice. MUST implement `tell()` and `seek()` (and `read()` of course). |
| `fileobj` will be reset to its original position when exiting the context manager. |
| seek_from (`int`): |
| The start of the slice (offset from position 0 in bytes). |
| read_limit (`int`): |
| The maximum number of bytes to read from the slice. |
| |
| Attributes: |
| previous_position (`int`): |
| The previous position |
| |
| Examples: |
| |
| Reading 200 bytes with an offset of 128 bytes from a file (ie bytes 128 to 327): |
| ```python |
| >>> with open("path/to/file", "rb") as file: |
| ... with SliceFileObj(file, seek_from=128, read_limit=200) as fslice: |
| ... fslice.read(...) |
| ``` |
| |
| Reading a file in chunks of 512 bytes |
| ```python |
| >>> import os |
| >>> chunk_size = 512 |
| >>> file_size = os.getsize("path/to/file") |
| >>> with open("path/to/file", "rb") as file: |
| ... for chunk_idx in range(ceil(file_size / chunk_size)): |
| ... with SliceFileObj(file, seek_from=chunk_idx * chunk_size, read_limit=chunk_size) as fslice: |
| ... chunk = fslice.read(...) |
| |
| ``` |
| """ |
|
|
| def __init__(self, fileobj: BinaryIO, seek_from: int, read_limit: int): |
| self.fileobj = fileobj |
| self.seek_from = seek_from |
| self.read_limit = read_limit |
|
|
| def __enter__(self): |
| self._previous_position = self.fileobj.tell() |
| end_of_stream = self.fileobj.seek(0, os.SEEK_END) |
| self._len = min(self.read_limit, end_of_stream - self.seek_from) |
| |
| self.fileobj.seek(self.seek_from, io.SEEK_SET) |
| return self |
|
|
| def __exit__(self, exc_type, exc_value, traceback): |
| self.fileobj.seek(self._previous_position, io.SEEK_SET) |
|
|
| def read(self, n: int = -1): |
| pos = self.tell() |
| if pos >= self._len: |
| return b"" |
| remaining_amount = self._len - pos |
| data = self.fileobj.read(remaining_amount if n < 0 else min(n, remaining_amount)) |
| return data |
|
|
| def tell(self) -> int: |
| return self.fileobj.tell() - self.seek_from |
|
|
| def seek(self, offset: int, whence: int = os.SEEK_SET) -> int: |
| start = self.seek_from |
| end = start + self._len |
| if whence in (os.SEEK_SET, os.SEEK_END): |
| offset = start + offset if whence == os.SEEK_SET else end + offset |
| offset = max(start, min(offset, end)) |
| whence = os.SEEK_SET |
| elif whence == os.SEEK_CUR: |
| cur_pos = self.fileobj.tell() |
| offset = max(start - cur_pos, min(offset, end - cur_pos)) |
| else: |
| raise ValueError(f"whence value {whence} is not supported") |
| return self.fileobj.seek(offset, whence) - self.seek_from |
|
|
| def __iter__(self): |
| yield self.read(n=4 * 1024 * 1024) |
|
|