@@ -3172,6 +3172,354 @@ def GetTotalSize(file_list):
31723172 PY_STDERR_TEXT .write ("Error accessing file {}: {}\n " .format (item , e ))
31733173 return total_size
31743174
3175+ def MajorMinorToDev (major , minor ):
3176+ """
3177+ Converts major and minor numbers to a device number.
3178+ Compatible with Python 2 and 3.
3179+ """
3180+ return (major << 8 ) | minor
3181+
3182+ def DevToMajorMinor (dev ):
3183+ """
3184+ Extracts major and minor numbers from a device number.
3185+ Compatible with Python 2 and 3.
3186+ """
3187+ major = (dev >> 8 ) & 0xFF
3188+ minor = dev & 0xFF
3189+ return major , minor
3190+
3191+
3192+ def GetDataFromArray (data , path , default = None ):
3193+ element = data
3194+ try :
3195+ for key in path :
3196+ element = element [key ]
3197+ return element
3198+ except (KeyError , TypeError , IndexError ):
3199+ return default
3200+
3201+
3202+ def GetDataFromArrayAlt (structure , path , default = None ):
3203+ element = structure
3204+ for key in path :
3205+ if isinstance (element , dict ) and key in element :
3206+ element = element [key ]
3207+ elif isinstance (element , list ) and isinstance (key , int ) and - len (element ) <= key < len (element ):
3208+ element = element [key ]
3209+ else :
3210+ return default
3211+ return element
3212+
3213+ # ========= pushback-aware delimiter reader =========
3214+ class _DelimiterReader :
3215+ """
3216+ Chunked reader that consumes up to N occurrences of a byte delimiter.
3217+ - Works with non-seekable streams by stashing over-read bytes on fp._read_until_delim_pushback
3218+ - For seekable streams, rewinds over-read via seek(-n, SEEK_CUR)
3219+ """
3220+ _PB_ATTR = "_read_until_delim_pushback"
3221+
3222+ def __init__ (self , fp , delimiter , chunk_size = 8192 , max_read = 64 * 1024 * 1024 ):
3223+ if not hasattr (fp , "read" ):
3224+ raise ValueError ("fp must be a readable file-like object" )
3225+
3226+ # normalize delimiter -> bytes
3227+ if delimiter is None :
3228+ delimiter = "\0 "
3229+ if isinstance (delimiter , str ):
3230+ delimiter_b = delimiter .encode ("utf-8" )
3231+ else :
3232+ delimiter_b = bytes (delimiter )
3233+ if not delimiter_b :
3234+ raise ValueError ("delimiter must not be empty" )
3235+
3236+ self .fp = fp
3237+ self .delim = delimiter_b
3238+ self .dlen = len (delimiter_b )
3239+ self .chunk = int (chunk_size )
3240+ self .max_read = int (max_read )
3241+
3242+ self ._buf = bytearray ()
3243+ self ._total = 0
3244+
3245+ # detect seekability (best-effort)
3246+ seekable = getattr (fp , "seekable" , None )
3247+ if callable (seekable ):
3248+ self ._seekable = bool (seekable ())
3249+ else :
3250+ self ._seekable = hasattr (fp , "seek" ) and hasattr (fp , "tell" )
3251+
3252+ # Preload any pushback from previous reads on this fp
3253+ pb = getattr (fp , self ._PB_ATTR , None )
3254+ if pb :
3255+ self ._buf .extend (pb )
3256+ setattr (fp , self ._PB_ATTR , bytearray ()) # consume
3257+
3258+ def _read_more (self ):
3259+ data = self .fp .read (self .chunk )
3260+ if not data :
3261+ return False
3262+ if not isinstance (data , (bytes , bytearray , memoryview )):
3263+ raise TypeError ("fp.read() must return bytes-like" )
3264+ if isinstance (data , memoryview ):
3265+ data = data .tobytes ()
3266+ self ._buf .extend (data )
3267+ self ._total += len (data )
3268+ if self ._total > self .max_read :
3269+ raise ValueError ("Maximum read limit reached without finding the delimiter" )
3270+ return True
3271+
3272+ def _pushback (self , over_bytes ):
3273+ """Return extra bytes to the stream (seek back) or stash on the fp."""
3274+ if not over_bytes :
3275+ return
3276+ if self ._seekable :
3277+ try :
3278+ self .fp .seek (- len (over_bytes ), io .SEEK_CUR )
3279+ return
3280+ except Exception :
3281+ pass
3282+ # Non-seekable: stash for next call on this fp
3283+ pb = getattr (self .fp , self ._PB_ATTR , None )
3284+ if pb is None :
3285+ setattr (self .fp , self ._PB_ATTR , bytearray (over_bytes ))
3286+ else :
3287+ pb .extend (over_bytes )
3288+
3289+ def read_one_piece (self ):
3290+ """
3291+ Read bytes up to (but not including) the next delimiter.
3292+ Returns (piece_bytes, found_delimiter_bool).
3293+ """
3294+ out = bytearray ()
3295+ while True :
3296+ idx = self ._buf .find (self .delim )
3297+ if idx != - 1 :
3298+ out .extend (self ._buf [:idx ])
3299+ over = self ._buf [idx + self .dlen :]
3300+ self ._buf [:] = b""
3301+ self ._pushback (over )
3302+ return bytes (out ), True
3303+
3304+ # No delimiter present: emit buffer and read more
3305+ if self ._buf :
3306+ out .extend (self ._buf )
3307+ self ._buf [:] = b""
3308+
3309+ if not self ._read_more ():
3310+ # EOF: return whatever we have (possibly empty), no delimiter
3311+ return bytes (out ), False
3312+
3313+ def read_n_pieces (self , n , pad_to_n = False ):
3314+ """
3315+ Read up to n pieces (n delimiters). Returns list of bytes; len <= n.
3316+ If pad_to_n=True, pads with b"" until length == n (avoids downstream IndexError).
3317+ """
3318+ n = int (n )
3319+ parts = []
3320+ while len (parts ) < n :
3321+ piece , found = self .read_one_piece ()
3322+ if not found and piece == b"" :
3323+ break # true EOF with nothing more
3324+ parts .append (piece )
3325+ if not found :
3326+ break # EOF after a final unterminated piece
3327+ if pad_to_n and len (parts ) < n :
3328+ parts .extend ([b"" ] * (n - len (parts )))
3329+ return parts
3330+
3331+
3332+ # ========= helpers =========
3333+ def _default_delim (delimiter ):
3334+ # Try your global spec if present; else default to NUL
3335+ try :
3336+ if delimiter is None :
3337+ delimiter = __file_format_dict__ ["format_delimiter" ]
3338+ except Exception :
3339+ pass
3340+ return delimiter if delimiter is not None else "\0 "
3341+
3342+
3343+ def _decode_text (b , errors ):
3344+ return b .decode ("utf-8" , errors = errors )
3345+
3346+
3347+ def _read_exact (fp , n ):
3348+ """Read exactly n bytes or raise EOFError on premature EOF."""
3349+ want = int (n )
3350+ out = bytearray ()
3351+ while len (out ) < want :
3352+ chunk = fp .read (want - len (out ))
3353+ if not chunk :
3354+ raise EOFError ("Unexpected EOF: wanted {} more bytes" .format (want - len (out )))
3355+ if isinstance (chunk , memoryview ):
3356+ chunk = chunk .tobytes ()
3357+ out .extend (chunk )
3358+ return bytes (out )
3359+
3360+
3361+ def _expect_delimiter (fp , delimiter ):
3362+ """Read exactly len(delimiter) bytes and require an exact match (no seeking)."""
3363+ delim = _default_delim (delimiter )
3364+ if isinstance (delim , str ):
3365+ delim_b = delim .encode ("utf-8" )
3366+ else :
3367+ delim_b = bytes (delim )
3368+ got = _read_exact (fp , len (delim_b ))
3369+ if got != delim_b :
3370+ raise ValueError ("Delimiter mismatch: expected {!r}, got {!r}" .format (delim_b , got ))
3371+
3372+
3373+ # ========= unified public API (bytes/text control) =========
3374+ def read_until_delimiter (
3375+ fp ,
3376+ delimiter = b"\0 " ,
3377+ max_read = None ,
3378+ chunk_size = None ,
3379+ decode = True ,
3380+ errors = None ,
3381+ ):
3382+ """
3383+ Read until the first occurrence of 'delimiter'. Strips the delimiter.
3384+ - Returns text (UTF-8) when decode=True; bytes when decode=False.
3385+ - Non-seekable streams are supported via pushback on the file object.
3386+ """
3387+ if max_read is None :
3388+ max_read = 64 * 1024 * 1024
3389+ if chunk_size is None :
3390+ chunk_size = 8192
3391+ if errors is None :
3392+ errors = "strict"
3393+
3394+ r = _DelimiterReader (
3395+ fp ,
3396+ delimiter = _default_delim (delimiter ),
3397+ chunk_size = chunk_size ,
3398+ max_read = max_read ,
3399+ )
3400+ piece , _found = r .read_one_piece ()
3401+ return _decode_text (piece , errors ) if decode else piece
3402+
3403+
3404+ def read_until_n_delimiters (
3405+ fp ,
3406+ delimiter = b"\0 " ,
3407+ num_delimiters = 1 ,
3408+ max_read = None ,
3409+ chunk_size = None ,
3410+ decode = True ,
3411+ errors = None ,
3412+ pad_to_n = False ,
3413+ ):
3414+ """
3415+ Read up to 'num_delimiters' occurrences. Returns list of pieces (len <= N).
3416+ If pad_to_n=True, pads with empty pieces to length N (useful for rigid parsers).
3417+ """
3418+ if max_read is None :
3419+ max_read = 64 * 1024 * 1024
3420+ if chunk_size is None :
3421+ chunk_size = 8192
3422+ if errors is None :
3423+ errors = "strict"
3424+
3425+ r = _DelimiterReader (
3426+ fp ,
3427+ delimiter = _default_delim (delimiter ),
3428+ chunk_size = chunk_size ,
3429+ max_read = max_read ,
3430+ )
3431+ parts = r .read_n_pieces (num_delimiters , pad_to_n = pad_to_n )
3432+ if decode :
3433+ return [_decode_text (p , errors ) for p in parts ]
3434+ return parts
3435+
3436+
3437+ # ========= back-compat wrappers (your original names) =========
3438+ def ReadTillNullByteOld (fp , delimiter = _default_delim (None )):
3439+ # emulate byte-by-byte via chunk_size=1; decode with 'replace' like your Alt
3440+ return read_until_delimiter (
3441+ fp ,
3442+ delimiter ,
3443+ max_read = 64 * 1024 * 1024 ,
3444+ chunk_size = 1 ,
3445+ decode = True ,
3446+ errors = "replace" ,
3447+ )
3448+
3449+
3450+ def ReadUntilNullByteOld (fp , delimiter = _default_delim (None )):
3451+ return ReadTillNullByteOld (fp , delimiter )
3452+
3453+
3454+ def ReadTillNullByteAlt (fp , delimiter = _default_delim (None ), chunk_size = 1024 , max_read = 64 * 1024 * 1024 ):
3455+ return read_until_delimiter (
3456+ fp ,
3457+ delimiter ,
3458+ max_read = max_read ,
3459+ chunk_size = chunk_size ,
3460+ decode = True ,
3461+ errors = "replace" ,
3462+ )
3463+
3464+
3465+ def ReadUntilNullByteAlt (fp , delimiter = _default_delim (None ), chunk_size = 1024 , max_read = 64 * 1024 * 1024 ):
3466+ return ReadTillNullByteAlt (fp , delimiter , chunk_size , max_read )
3467+
3468+
3469+ def ReadTillNullByte (fp , delimiter = _default_delim (None ), max_read = 64 * 1024 * 1024 ):
3470+ return read_until_delimiter (
3471+ fp ,
3472+ delimiter ,
3473+ max_read = max_read ,
3474+ chunk_size = 8192 ,
3475+ decode = True ,
3476+ errors = "strict" ,
3477+ )
3478+
3479+
3480+ def ReadUntilNullByte (fp , delimiter = _default_delim (None ), max_read = 64 * 1024 * 1024 ):
3481+ return ReadTillNullByte (fp , delimiter , max_read )
3482+
3483+
3484+ def ReadTillNullByteByNum (
3485+ fp ,
3486+ delimiter = _default_delim (None ),
3487+ num_delimiters = 1 ,
3488+ chunk_size = 1024 ,
3489+ max_read = 64 * 1024 * 1024 ,
3490+ ):
3491+ # Return list of text parts; **pad to N** to avoid IndexError in rigid parsers
3492+ return read_until_n_delimiters (
3493+ fp ,
3494+ delimiter ,
3495+ num_delimiters ,
3496+ max_read = max_read ,
3497+ chunk_size = chunk_size ,
3498+ decode = True ,
3499+ errors = "replace" ,
3500+ pad_to_n = True ,
3501+ )
3502+
3503+
3504+ def ReadUntilNullByteByNum (
3505+ fp ,
3506+ delimiter = _default_delim (None ),
3507+ num_delimiters = 1 ,
3508+ chunk_size = 1024 ,
3509+ max_read = 64 * 1024 * 1024 ,
3510+ ):
3511+ return ReadTillNullByteByNum (fp , delimiter , num_delimiters , chunk_size , max_read )
3512+
3513+
3514+ def SeekToEndOfFile (fp ):
3515+ lasttell = 0
3516+ while (True ):
3517+ fp .seek (1 , 1 )
3518+ if (lasttell == fp .tell ()):
3519+ break
3520+ lasttell = fp .tell ()
3521+ return True
3522+
31753523def ReadFileHeaderData (fp , skipchecksum = False , formatspecs = None , saltkey = None ):
31763524 if (formatspecs is None ):
31773525 formatspecs = __file_format_multi_dict__
0 commit comments