@@ -493,6 +493,17 @@ def _gpu_compress_to_part(gpu_arr, w, h, spp):
493493 # issue #1948.
494494 current = arr
495495 cumulative_factor = 1
496+ # ``make_overview_gpu`` preserves dtype, so the sentinel cast is
497+ # loop-invariant. Hoist it (and the float/finite gate) out of the
498+ # inner ``while`` to skip redundant per-level scalar work.
499+ rewrite_nodata = (
500+ nodata is not None
501+ and np_dtype .kind == 'f'
502+ and not np .isnan (float (nodata ))
503+ )
504+ sentinel_scalar = (
505+ np_dtype .type (nodata ) if rewrite_nodata else None
506+ )
496507 for target_factor in overview_levels :
497508 # Halve repeatedly until the cumulative decimation matches
498509 # the requested factor. Validation has already established
@@ -502,14 +513,10 @@ def _gpu_compress_to_part(gpu_arr, w, h, spp):
502513 current = make_overview_gpu (current , method = overview_resampling ,
503514 nodata = nodata )
504515 cumulative_factor *= 2
505- if (nodata is not None
506- and np .dtype (str (current .dtype )).kind == 'f'
507- and not np .isnan (float (nodata ))):
516+ if rewrite_nodata :
508517 nan_mask = cupy .isnan (current )
509518 if bool (nan_mask .any ().item ()):
510- cupy .putmask (
511- current , nan_mask ,
512- np .dtype (str (current .dtype )).type (nodata ))
519+ cupy .putmask (current , nan_mask , sentinel_scalar )
513520 oh , ow = current .shape [:2 ]
514521 parts .append (_gpu_compress_to_part (current , ow , oh , samples ))
515522
0 commit comments