Skip to content

Commit 5b7e5c4

Browse files
committed
Small update
1 parent 8588e64 commit 5b7e5c4

1 file changed

Lines changed: 119 additions & 2 deletions

File tree

pyneofile/pyneofile.py

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,6 +2449,121 @@ def GetBinaryFileType(infile, filestart=0, closefp=True):
24492449
# -------------- FALLBACK --------------
24502450
return False
24512451

2452+
def _get_seek_consts():
2453+
"""Return (SEEK_DATA, SEEK_HOLE) if supported, else (None, None)."""
2454+
seek_data = getattr(os, "SEEK_DATA", None)
2455+
seek_hole = getattr(os, "SEEK_HOLE", None)
2456+
if seek_data is None or seek_hole is None:
2457+
return None, None
2458+
return seek_data, seek_hole
2459+
2460+
def pack_sparse_to_stream(path, out_fp, bufsize=1024*1024):
2461+
"""
2462+
Write ONLY data extents from sparse file `path` into `out_fp`.
2463+
Returns: (logical_size, extents, stored_bytes)
2464+
extents: list of (offset, length) in logical file
2465+
stored_bytes: total bytes written to out_fp
2466+
"""
2467+
st = os.stat(path, follow_symlinks=False)
2468+
logical_size = int(st.st_size)
2469+
extents = []
2470+
stored = 0
2471+
2472+
SEEK_DATA, SEEK_HOLE = _get_seek_consts()
2473+
2474+
with open(path, "rb", buffering=0) as f:
2475+
if SEEK_DATA is not None and SEEK_HOLE is not None:
2476+
# Kernel knows where holes are (best, fastest, exact).
2477+
pos = 0
2478+
while pos < logical_size:
2479+
try:
2480+
data_off = os.lseek(f.fileno(), pos, SEEK_DATA)
2481+
except OSError:
2482+
break # no more data
2483+
try:
2484+
hole_off = os.lseek(f.fileno(), data_off, SEEK_HOLE)
2485+
except OSError:
2486+
hole_off = logical_size
2487+
if hole_off > logical_size:
2488+
hole_off = logical_size
2489+
2490+
length = hole_off - data_off
2491+
if length <= 0:
2492+
pos = max(pos + 1, hole_off)
2493+
continue
2494+
2495+
extents.append((data_off, length))
2496+
# copy that extent’s bytes into out_fp
2497+
os.lseek(f.fileno(), data_off, os.SEEK_SET)
2498+
remaining = length
2499+
while remaining:
2500+
chunk = f.read(min(bufsize, remaining))
2501+
if not chunk:
2502+
break
2503+
out_fp.write(chunk)
2504+
stored += len(chunk)
2505+
remaining -= len(chunk)
2506+
2507+
pos = hole_off
2508+
else:
2509+
# Portable fallback (no SEEK_HOLE/DATA): scan for non-zero blocks.
2510+
# Not perfect (won't detect "real zeros" vs "holes"), but works as a fallback.
2511+
block = 4096
2512+
pos = 0
2513+
while pos < logical_size:
2514+
chunk = f.read(block)
2515+
if not chunk:
2516+
break
2517+
if any(b != 0 for b in chunk):
2518+
off = pos
2519+
# extend this run while blocks have any non-zero
2520+
run = bytearray(chunk)
2521+
while True:
2522+
nxt = f.read(block)
2523+
if not nxt or not any(b != 0 for b in nxt):
2524+
if nxt:
2525+
# rewind one block if it was all-zero (we read too far)
2526+
f.seek(-len(nxt), os.SEEK_CUR)
2527+
break
2528+
run.extend(nxt)
2529+
extents.append((off, len(run)))
2530+
out_fp.write(run)
2531+
stored += len(run)
2532+
pos = off + len(run)
2533+
else:
2534+
pos += len(chunk)
2535+
2536+
out_fp.seek(0, os.SEEK_SET)
2537+
return logical_size, extents, stored
2538+
2539+
def write_sparse_to_fileobj(out_fp, logical_size, extents, in_fp, bufsize=1024*1024):
2540+
"""
2541+
Recreate sparse file layout into an already-open writable file-like object.
2542+
"""
2543+
out_fp.seek(0)
2544+
out_fp.truncate(int(logical_size))
2545+
2546+
for off, length in extents:
2547+
out_fp.seek(int(off), os.SEEK_SET)
2548+
remaining = int(length)
2549+
while remaining:
2550+
chunk = in_fp.read(min(bufsize, remaining))
2551+
if not chunk:
2552+
raise EOFError("Archive ended while reading sparse extent data")
2553+
out_fp.write(chunk)
2554+
remaining -= len(chunk)
2555+
2556+
def unpack_sparse_to_path(in_fp, out_path, logical_size, extents, bufsize=1024*1024):
2557+
os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
2558+
2559+
with open(out_path, "wb") as f:
2560+
write_sparse_to_fileobj(f, logical_size, extents, in_fp, bufsize)
2561+
2562+
try:
2563+
f.flush()
2564+
os.fsync(f.fileno())
2565+
except Exception:
2566+
pass
24522567

24532568
def _is_valid_zlib_header(cmf, flg):
24542569
"""
@@ -5989,7 +6104,8 @@ def AppendFilesWithContentToList(infiles, dirlistfromtxt=False, extradata=[], js
59896104
# Types that should be considered zero-length in the archive context:
59906105
zero_length_types = {1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13}
59916106
# Types that have actual data to read:
5992-
data_types = {0, 7, 12}
6107+
data_types = {0, 7}
6108+
sparse_types = {12}
59936109
if ftype in zero_length_types:
59946110
fsize = format(int("0"), 'x').lower()
59956111
elif ftype in data_types:
@@ -6306,7 +6422,8 @@ def AppendFilesWithContentFromTarFileToList(infile, extradata=[], jsondata={}, c
63066422
# Types that should be considered zero-length in the archive context:
63076423
zero_length_types = {1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 13}
63086424
# Types that have actual data to read:
6309-
data_types = {0, 7, 12}
6425+
data_types = {0, 7}
6426+
sparse_types = {12}
63106427
if ftype in zero_length_types:
63116428
fsize = format(int("0"), 'x').lower()
63126429
elif ftype in data_types:

0 commit comments

Comments
 (0)