diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index bb9ebcd4e619..b1d387ea7455 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -229,6 +229,23 @@ impl JoinLeftData { &self.batch } + /// Returns `true` if the build side physically contains rows. + /// + /// This is distinct from [`Self::has_matchable_build_rows`]: a build side + /// can hold rows while its hash map is empty (see that method). + pub(super) fn has_build_rows(&self) -> bool { + self.batch().num_rows() > 0 + } + + /// Returns `true` if the build-side hash map has any matchable entries. + /// + /// Under [`NullEquality::NullEqualsNothing`] build rows whose join key is + /// NULL are omitted from the map, so this can be `false` even when + /// [`Self::has_build_rows`] is `true`. + pub(super) fn has_matchable_build_rows(&self) -> bool { + !self.map().is_empty() + } + /// returns a reference to the build side expressions values pub(super) fn values(&self) -> &[ArrayRef] { &self.values diff --git a/datafusion/physical-plan/src/joins/hash_join/stream.rs b/datafusion/physical-plan/src/joins/hash_join/stream.rs index ed605301ad4a..2aa6e69dff80 100644 --- a/datafusion/physical-plan/src/joins/hash_join/stream.rs +++ b/datafusion/physical-plan/src/joins/hash_join/stream.rs @@ -523,12 +523,12 @@ impl HashJoinStream { join_type: JoinType, left_data: &JoinLeftData, ) -> HashJoinStreamState { - let build_empty = left_data.batch().num_rows() == 0; + let build_empty = !left_data.has_build_rows(); // The map can be empty even when the build side has rows: under // `NullEqualsNothing`, build rows with a NULL join key are omitted. For // join types whose every output row requires a build match, that still // guarantees an empty result, so we can skip scanning the probe side. - let map_empty = left_data.map().is_empty(); + let map_empty = !left_data.has_matchable_build_rows(); if (build_empty && join_type.empty_build_side_produces_empty_result()) || (map_empty && join_type.empty_map_produces_empty_result()) @@ -779,7 +779,7 @@ impl HashJoinStream { } } - let is_empty = build_side.left_data.map().is_empty(); + let is_empty = !build_side.left_data.has_matchable_build_rows(); if is_empty { let result = build_batch_empty_build_side(