@@ -2449,6 +2449,121 @@ def GetBinaryFileType(infile, filestart=0, closefp=True):
24492449 # -------------- FALLBACK --------------
24502450 return False
24512451
2452+ def _get_seek_consts ():
2453+ """Return (SEEK_DATA, SEEK_HOLE) if supported, else (None, None)."""
2454+ seek_data = getattr (os , "SEEK_DATA" , None )
2455+ seek_hole = getattr (os , "SEEK_HOLE" , None )
2456+ if seek_data is None or seek_hole is None :
2457+ return None , None
2458+ return seek_data , seek_hole
2459+
2460+ def pack_sparse_to_stream (path , out_fp , bufsize = 1024 * 1024 ):
2461+ """
2462+ Write ONLY data extents from sparse file `path` into `out_fp`.
2463+ Returns: (logical_size, extents, stored_bytes)
2464+ extents: list of (offset, length) in logical file
2465+ stored_bytes: total bytes written to out_fp
2466+ """
2467+ st = os .stat (path , follow_symlinks = False )
2468+ logical_size = int (st .st_size )
2469+ extents = []
2470+ stored = 0
2471+
2472+ SEEK_DATA , SEEK_HOLE = _get_seek_consts ()
2473+
2474+ with open (path , "rb" , buffering = 0 ) as f :
2475+ if SEEK_DATA is not None and SEEK_HOLE is not None :
2476+ # Kernel knows where holes are (best, fastest, exact).
2477+ pos = 0
2478+ while pos < logical_size :
2479+ try :
2480+ data_off = os .lseek (f .fileno (), pos , SEEK_DATA )
2481+ except OSError :
2482+ break # no more data
2483+ try :
2484+ hole_off = os .lseek (f .fileno (), data_off , SEEK_HOLE )
2485+ except OSError :
2486+ hole_off = logical_size
2487+ if hole_off > logical_size :
2488+ hole_off = logical_size
2489+
2490+ length = hole_off - data_off
2491+ if length <= 0 :
2492+ pos = max (pos + 1 , hole_off )
2493+ continue
2494+
2495+ extents .append ((data_off , length ))
2496+ # copy that extent’s bytes into out_fp
2497+ os .lseek (f .fileno (), data_off , os .SEEK_SET )
2498+ remaining = length
2499+ while remaining :
2500+ chunk = f .read (min (bufsize , remaining ))
2501+ if not chunk :
2502+ break
2503+ out_fp .write (chunk )
2504+ stored += len (chunk )
2505+ remaining -= len (chunk )
2506+
2507+ pos = hole_off
2508+ else :
2509+ # Portable fallback (no SEEK_HOLE/DATA): scan for non-zero blocks.
2510+ # Not perfect (won't detect "real zeros" vs "holes"), but works as a fallback.
2511+ block = 4096
2512+ pos = 0
2513+ while pos < logical_size :
2514+ chunk = f .read (block )
2515+ if not chunk :
2516+ break
2517+ if any (b != 0 for b in chunk ):
2518+ off = pos
2519+ # extend this run while blocks have any non-zero
2520+ run = bytearray (chunk )
2521+ while True :
2522+ nxt = f .read (block )
2523+ if not nxt or not any (b != 0 for b in nxt ):
2524+ if nxt :
2525+ # rewind one block if it was all-zero (we read too far)
2526+ f .seek (- len (nxt ), os .SEEK_CUR )
2527+ break
2528+ run .extend (nxt )
2529+ extents .append ((off , len (run )))
2530+ out_fp .write (run )
2531+ stored += len (run )
2532+ pos = off + len (run )
2533+ else :
2534+ pos += len (chunk )
2535+
2536+ out_fp .seek (0 , os .SEEK_SET )
2537+ return logical_size , extents , stored
2538+
2539+ def write_sparse_to_fileobj (out_fp , logical_size , extents , in_fp , bufsize = 1024 * 1024 ):
2540+ """
2541+ Recreate sparse file layout into an already-open writable file-like object.
2542+ """
2543+ out_fp .seek (0 )
2544+ out_fp .truncate (int (logical_size ))
2545+
2546+ for off , length in extents :
2547+ out_fp .seek (int (off ), os .SEEK_SET )
2548+ remaining = int (length )
2549+ while remaining :
2550+ chunk = in_fp .read (min (bufsize , remaining ))
2551+ if not chunk :
2552+ raise EOFError ("Archive ended while reading sparse extent data" )
2553+ out_fp .write (chunk )
2554+ remaining -= len (chunk )
2555+
2556+ def unpack_sparse_to_path (in_fp , out_path , logical_size , extents , bufsize = 1024 * 1024 ):
2557+ os .makedirs (os .path .dirname (out_path ) or "." , exist_ok = True )
2558+
2559+ with open (out_path , "wb" ) as f :
2560+ write_sparse_to_fileobj (f , logical_size , extents , in_fp , bufsize )
2561+
2562+ try :
2563+ f .flush ()
2564+ os .fsync (f .fileno ())
2565+ except Exception :
2566+ pass
24522567
24532568def _is_valid_zlib_header (cmf , flg ):
24542569 """
@@ -5989,7 +6104,8 @@ def AppendFilesWithContentToList(infiles, dirlistfromtxt=False, extradata=[], js
59896104 # Types that should be considered zero-length in the archive context:
59906105 zero_length_types = {1 , 2 , 3 , 4 , 5 , 6 , 8 , 9 , 10 , 11 , 13 }
59916106 # Types that have actual data to read:
5992- data_types = {0 , 7 , 12 }
6107+ data_types = {0 , 7 }
6108+ sparse_types = {12 }
59936109 if ftype in zero_length_types :
59946110 fsize = format (int ("0" ), 'x' ).lower ()
59956111 elif ftype in data_types :
@@ -6306,7 +6422,8 @@ def AppendFilesWithContentFromTarFileToList(infile, extradata=[], jsondata={}, c
63066422 # Types that should be considered zero-length in the archive context:
63076423 zero_length_types = {1 , 2 , 3 , 4 , 5 , 6 , 8 , 9 , 10 , 11 , 13 }
63086424 # Types that have actual data to read:
6309- data_types = {0 , 7 , 12 }
6425+ data_types = {0 , 7 }
6426+ sparse_types = {12 }
63106427 if ftype in zero_length_types :
63116428 fsize = format (int ("0" ), 'x' ).lower ()
63126429 elif ftype in data_types :
0 commit comments