Skip to content

Commit ffc5783

Browse files
committed
Small update
1 parent 615438b commit ffc5783

1 file changed

Lines changed: 265 additions & 0 deletions

File tree

pyarchivefile/pyarchivefile.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3783,6 +3783,271 @@ def GetDataFromArrayAlt(structure, path, default=None):
37833783
return default
37843784
return element
37853785

3786+
class MultiOpen:
3787+
def __init__(self, *paths, mode="r+b"):
3788+
self.files = [open(p, mode) for p in paths]
3789+
self.sizes = [os.path.getsize(p) for p in paths]
3790+
self.total_size = sum(self.sizes)
3791+
self.position = 0
3792+
3793+
def tell(self):
3794+
return self.position
3795+
3796+
def seek(self, offset, whence=os.SEEK_SET):
3797+
if whence == os.SEEK_SET:
3798+
new_pos = offset
3799+
elif whence == os.SEEK_CUR:
3800+
new_pos = self.position + offset
3801+
elif whence == os.SEEK_END:
3802+
new_pos = self.total_size + offset
3803+
else:
3804+
raise ValueError("Invalid whence")
3805+
3806+
if not (0 <= new_pos <= self.total_size):
3807+
raise ValueError("Seek out of range")
3808+
3809+
self.position = new_pos
3810+
return self.position
3811+
3812+
def _locate_file(self, position):
3813+
cumulative = 0
3814+
for i, size in enumerate(self.sizes):
3815+
if position < cumulative + size:
3816+
return i, position - cumulative
3817+
cumulative += size
3818+
return len(self.files) - 1, self.sizes[-1]
3819+
3820+
def read(self, size=-1):
3821+
if size < 0:
3822+
size = self.total_size - self.position
3823+
3824+
data = bytearray()
3825+
remaining = size
3826+
3827+
while remaining > 0 and self.position < self.total_size:
3828+
idx, offset = self._locate_file(self.position)
3829+
f = self.files[idx]
3830+
f.seek(offset)
3831+
3832+
to_read = min(remaining, self.sizes[idx] - offset)
3833+
chunk = f.read(to_read)
3834+
3835+
if not chunk:
3836+
break
3837+
3838+
data.extend(chunk)
3839+
read_len = len(chunk)
3840+
self.position += read_len
3841+
remaining -= read_len
3842+
3843+
return bytes(data)
3844+
3845+
def write(self, data):
3846+
remaining = len(data)
3847+
written = 0
3848+
3849+
while remaining > 0 and self.position < self.total_size:
3850+
idx, offset = self._locate_file(self.position)
3851+
f = self.files[idx]
3852+
f.seek(offset)
3853+
3854+
to_write = min(remaining, self.sizes[idx] - offset)
3855+
chunk = data[written:written + to_write]
3856+
f.write(chunk)
3857+
f.flush()
3858+
3859+
self.position += to_write
3860+
written += to_write
3861+
remaining -= to_write
3862+
3863+
return written
3864+
3865+
def close(self):
3866+
for f in self.files:
3867+
f.close()
3868+
3869+
class MultiFileRaw(io.RawIOBase):
3870+
"""
3871+
Treat multiple underlying files as one continuous binary stream.
3872+
Works best when all component files already exist and have fixed sizes.
3873+
3874+
- Supports readinto(), read(), write(), seek(), tell()
3875+
- Intended for binary modes: 'rb', 'r+b', 'wb', etc.
3876+
"""
3877+
def __init__(self, paths, mode="r+b"):
3878+
super().__init__()
3879+
if isinstance(paths, (str, bytes, os.PathLike)):
3880+
paths = [paths]
3881+
self._paths = list(paths)
3882+
self._mode = mode
3883+
self._files = [open(p, mode) for p in self._paths]
3884+
self._sizes = [os.path.getsize(p) for p in self._paths]
3885+
self._total = sum(self._sizes)
3886+
self._pos = 0
3887+
self._closed = False
3888+
3889+
# --- Helpers ---
3890+
def _check_open(self):
3891+
if self._closed:
3892+
raise ValueError("I/O operation on closed MultiFileRaw")
3893+
3894+
def _locate(self, pos: int):
3895+
"""Return (file_index, offset_in_that_file) for absolute stream position."""
3896+
# pos in [0, total]
3897+
acc = 0
3898+
for i, sz in enumerate(self._sizes):
3899+
nxt = acc + sz
3900+
if pos < nxt:
3901+
return i, pos - acc
3902+
acc = nxt
3903+
# pos == total -> point at end of last file
3904+
return len(self._files) - 1, self._sizes[-1]
3905+
3906+
# --- io.RawIOBase API ---
3907+
def readable(self):
3908+
return "r" in self._mode or "+" in self._mode
3909+
3910+
def writable(self):
3911+
return any(ch in self._mode for ch in ("w", "a", "+"))
3912+
3913+
def seekable(self):
3914+
return True
3915+
3916+
def tell(self):
3917+
self._check_open()
3918+
return self._pos
3919+
3920+
def seek(self, offset, whence=os.SEEK_SET):
3921+
self._check_open()
3922+
if whence == os.SEEK_SET:
3923+
new = int(offset)
3924+
elif whence == os.SEEK_CUR:
3925+
new = self._pos + int(offset)
3926+
elif whence == os.SEEK_END:
3927+
new = self._total + int(offset)
3928+
else:
3929+
raise ValueError("Invalid whence")
3930+
3931+
if new < 0 or new > self._total:
3932+
raise ValueError("Seek out of range")
3933+
3934+
self._pos = new
3935+
return self._pos
3936+
3937+
def readinto(self, b):
3938+
"""
3939+
Read bytes into a pre-allocated, writable bytes-like object b.
3940+
Returns number of bytes read (0 at EOF).
3941+
"""
3942+
self._check_open()
3943+
if not self.readable():
3944+
raise io.UnsupportedOperation("not readable")
3945+
3946+
mv = memoryview(b).cast("B")
3947+
if len(mv) == 0:
3948+
return 0
3949+
if self._pos >= self._total:
3950+
return 0
3951+
3952+
remaining = len(mv)
3953+
out_off = 0
3954+
3955+
while remaining > 0 and self._pos < self._total:
3956+
idx, off = self._locate(self._pos)
3957+
f = self._files[idx]
3958+
f.seek(off, os.SEEK_SET)
3959+
3960+
can = min(remaining, self._sizes[idx] - off)
3961+
n = f.readinto(mv[out_off:out_off + can])
3962+
if not n:
3963+
break
3964+
3965+
self._pos += n
3966+
out_off += n
3967+
remaining -= n
3968+
3969+
return out_off
3970+
3971+
def read(self, size=-1):
3972+
self._check_open()
3973+
if size is None or size < 0:
3974+
size = self._total - self._pos
3975+
if size == 0 or self._pos >= self._total:
3976+
return b""
3977+
3978+
buf = bytearray(size)
3979+
n = self.readinto(buf)
3980+
return bytes(buf[:n])
3981+
3982+
def write(self, b):
3983+
self._check_open()
3984+
if not self.writable():
3985+
raise io.UnsupportedOperation("not writable")
3986+
3987+
mv = memoryview(b).cast("B")
3988+
total_to_write = len(mv)
3989+
if total_to_write == 0:
3990+
return 0
3991+
3992+
remaining = total_to_write
3993+
in_off = 0
3994+
3995+
# This implementation writes *within existing file extents*.
3996+
# If you want auto-growing into the last file, say so and I’ll adjust.
3997+
while remaining > 0 and self._pos < self._total:
3998+
idx, off = self._locate(self._pos)
3999+
f = self._files[idx]
4000+
f.seek(off, os.SEEK_SET)
4001+
4002+
can = min(remaining, self._sizes[idx] - off)
4003+
n = f.write(mv[in_off:in_off + can])
4004+
if n is None:
4005+
n = can # some file objects may return None; assume full write
4006+
if n <= 0:
4007+
break
4008+
4009+
self._pos += n
4010+
in_off += n
4011+
remaining -= n
4012+
4013+
return total_to_write - remaining
4014+
4015+
def flush(self):
4016+
self._check_open()
4017+
for f in self._files:
4018+
f.flush()
4019+
4020+
def close(self):
4021+
if not self._closed:
4022+
try:
4023+
for f in self._files:
4024+
try:
4025+
f.close()
4026+
except Exception:
4027+
pass
4028+
finally:
4029+
self._closed = True
4030+
super().close()
4031+
4032+
4033+
def multiopen(paths, mode="r+b", buffering=io.DEFAULT_BUFFER_SIZE):
4034+
"""
4035+
Return a buffered, seekable file-like object over multiple files.
4036+
4037+
Examples:
4038+
f = multiopen(["a.bin","b.bin"], "rb")
4039+
f = multiopen(["a.bin","b.bin"], "r+b") # read/write
4040+
"""
4041+
raw = MultiFileRaw(paths, mode=mode)
4042+
4043+
# Choose an appropriate buffered wrapper
4044+
if "r" in mode and "+" not in mode and "w" not in mode and "a" not in mode:
4045+
return io.BufferedReader(raw, buffer_size=buffering)
4046+
if any(ch in mode for ch in ("w", "a")) and "+" not in mode and "r" not in mode:
4047+
return io.BufferedWriter(raw, buffer_size=buffering)
4048+
# default for random read/write
4049+
return io.BufferedRandom(raw, buffer_size=buffering)
4050+
37864051
# ========= pushback-aware delimiter reader =========
37874052
class _DelimiterReader:
37884053
"""

0 commit comments

Comments
 (0)