From 56485082a0361d94782e34f0860cef8710f1c03f Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 22 May 2026 00:23:59 -0700 Subject: [PATCH] [spr] initial version Created using spr 1.3.6-beta.1 --- crates/iddqd/src/bi_hash_map/imp.rs | 36 ++-- crates/iddqd/src/id_hash_map/imp.rs | 31 +-- crates/iddqd/src/support/hash_table.rs | 19 -- crates/iddqd/src/tri_hash_map/imp.rs | 47 +++-- .../iddqd/tests/integration/pathological.rs | 196 +++++++++++++++++- 5 files changed, 253 insertions(+), 76 deletions(-) diff --git a/crates/iddqd/src/bi_hash_map/imp.rs b/crates/iddqd/src/bi_hash_map/imp.rs index 54c5f2c..f40a1f1 100644 --- a/crates/iddqd/src/bi_hash_map/imp.rs +++ b/crates/iddqd/src/bi_hash_map/imp.rs @@ -2189,32 +2189,32 @@ impl BiHashMap { &mut self, remove_index: ItemIndex, ) -> Option { - // For panic safety, look up both table entries while `self.items` still - // holds the value, then remove from both tables and items in sequence. - // hashbrown's `find_entry` is panic-safe under user-`Hash`/`Eq` panics - // (the table is not mutated until `OccupiedEntry::remove` is called), - // so a panic during the lookups leaves items and both tables - // unmodified. + // For panic safety, compute both key hashes and look up both table + // entries while `self.items` still holds the value, then remove from + // both tables and items in sequence. These lookups deliberately match + // by `ItemIndex` rather than by user `Eq`: at this point we already + // know which item is being removed, and user `Eq` might be + // pathological. hashbrown's `find_entry_by_hash` is panic-safe because + // the table is not mutated until `OccupiedEntry::remove` is called, so + // a panic while hashing leaves items and both tables unmodified. let item = self.items.get(remove_index)?; - let key1 = item.key1(); - let key2 = item.key2(); let state = &self.tables.state; - let Ok(entry1) = - self.tables - .k1_to_item - .find_entry(state, &key1, |index| self.items[index].key1()) + let hash1 = state.hash_one(item.key1()); + let hash2 = state.hash_one(item.key2()); + let Ok(entry1) = self + .tables + .k1_to_item + .find_entry_by_hash(hash1, |index| index == remove_index) else { panic!("remove_index {remove_index} not found in k1_to_item"); }; - let Ok(entry2) = - self.tables - .k2_to_item - .find_entry(state, &key2, |index| self.items[index].key2()) + let Ok(entry2) = self + .tables + .k2_to_item + .find_entry_by_hash(hash2, |index| index == remove_index) else { panic!("remove_index {remove_index} not found in k2_to_item"); }; - // Drop the keys so that `self.items` can be mutated below. - drop((key1, key2)); entry1.remove(); entry2.remove(); Some( diff --git a/crates/iddqd/src/id_hash_map/imp.rs b/crates/iddqd/src/id_hash_map/imp.rs index 57a2e87..29317c2 100644 --- a/crates/iddqd/src/id_hash_map/imp.rs +++ b/crates/iddqd/src/id_hash_map/imp.rs @@ -1426,24 +1426,25 @@ impl IdHashMap { &mut self, remove_index: ItemIndex, ) -> Option { - // For panic safety, look up the table entry while `self.items` still - // holds the value, then remove from the table and items in sequence. - // hashbrown's `find_entry` is panic-safe under user-`Hash`/`Eq` panics - // (the table is not mutated until `OccupiedEntry::remove` is called), - // so a panic during the lookup leaves both items and the table - // unmodified. - let key = self.items.get(remove_index)?.key(); + // For panic safety, compute the key hash and look up the table entry + // while `self.items` still holds the value, then remove from the table + // and items in sequence. This lookup deliberately matches by + // `ItemIndex` rather than by user `Eq`: at this point we already know + // which item is being removed, and user `Eq` might be pathological. + // hashbrown's `find_entry_by_hash` is panic-safe because the table is + // not mutated until `OccupiedEntry::remove` is called, so a panic while + // hashing leaves both items and the table unmodified. + let item = self.items.get(remove_index)?; let state = &self.tables.state; - let Ok(item) = - self.tables - .key_to_item - .find_entry(state, &key, |index| self.items[index].key()) + let hash = state.hash_one(item.key()); + let Ok(entry) = self + .tables + .key_to_item + .find_entry_by_hash(hash, |index| index == remove_index) else { - panic!("we just looked this item up"); + panic!("remove_index {remove_index} not found in key_to_item"); }; - // Drop the key so that `self.items` can be mutated below. - drop(key); - item.remove(); + entry.remove(); Some( self.items .remove(remove_index) diff --git a/crates/iddqd/src/support/hash_table.rs b/crates/iddqd/src/support/hash_table.rs index ee2822b..90245b6 100644 --- a/crates/iddqd/src/support/hash_table.rs +++ b/crates/iddqd/src/support/hash_table.rs @@ -9,7 +9,6 @@ use super::{ use crate::internal::{TableValidationError, ValidateCompact}; use alloc::{collections::BTreeSet, vec::Vec}; use core::{ - borrow::Borrow, fmt, hash::{BuildHasher, Hash}, }; @@ -129,24 +128,6 @@ impl MapHashTable { ) } - pub(crate) fn find_entry( - &mut self, - state: &S, - key: &Q, - lookup: F, - ) -> Result< - OccupiedEntry<'_, ItemIndex, AllocWrapper>, - AbsentEntry<'_, ItemIndex, AllocWrapper>, - > - where - F: Fn(ItemIndex) -> K, - K: Hash + Eq + Borrow, - Q: ?Sized + Hash + Eq, - { - let hash = state.hash_one(key); - self.items.find_entry(hash, |index| lookup(*index).borrow() == key) - } - pub(crate) fn find_entry_by_hash( &mut self, hash: u64, diff --git a/crates/iddqd/src/tri_hash_map/imp.rs b/crates/iddqd/src/tri_hash_map/imp.rs index 517aea4..e6eb09b 100644 --- a/crates/iddqd/src/tri_hash_map/imp.rs +++ b/crates/iddqd/src/tri_hash_map/imp.rs @@ -2730,40 +2730,41 @@ impl TriHashMap { &mut self, remove_index: ItemIndex, ) -> Option { - // For panic safety, look up all three table entries while `self.items` - // still holds the value, then remove from all three tables and items - // in sequence. hashbrown's `find_entry` is panic-safe under - // user-`Hash`/`Eq` panics (the table is not mutated until - // `OccupiedEntry::remove` is called), so a panic during the lookups - // leaves items and all three tables unmodified. + // For panic safety, compute all three key hashes and look up all three + // table entries while `self.items` still holds the value, then remove + // from all three tables and items in sequence. These lookups + // deliberately match by `ItemIndex` rather than by user `Eq`: at this + // point we already know which item is being removed, and user `Eq` + // might be pathological. hashbrown's `find_entry_by_hash` is + // panic-safe because the table is not mutated until + // `OccupiedEntry::remove` is called, so a panic while hashing leaves + // items and all three tables unmodified. let item = self.items.get(remove_index)?; - let key1 = item.key1(); - let key2 = item.key2(); - let key3 = item.key3(); let state = &self.tables.state; - let Ok(entry1) = - self.tables - .k1_to_item - .find_entry(state, &key1, |index| self.items[index].key1()) + let hash1 = state.hash_one(item.key1()); + let hash2 = state.hash_one(item.key2()); + let hash3 = state.hash_one(item.key3()); + let Ok(entry1) = self + .tables + .k1_to_item + .find_entry_by_hash(hash1, |index| index == remove_index) else { panic!("remove_index {remove_index} not found in k1_to_item"); }; - let Ok(entry2) = - self.tables - .k2_to_item - .find_entry(state, &key2, |index| self.items[index].key2()) + let Ok(entry2) = self + .tables + .k2_to_item + .find_entry_by_hash(hash2, |index| index == remove_index) else { panic!("remove_index {remove_index} not found in k2_to_item"); }; - let Ok(entry3) = - self.tables - .k3_to_item - .find_entry(state, &key3, |index| self.items[index].key3()) + let Ok(entry3) = self + .tables + .k3_to_item + .find_entry_by_hash(hash3, |index| index == remove_index) else { panic!("remove_index {remove_index} not found in k3_to_item"); }; - // Drop the keys so that `self.items` can be mutated below. - drop((key1, key2, key3)); entry1.remove(); entry2.remove(); entry3.remove(); diff --git a/crates/iddqd/tests/integration/pathological.rs b/crates/iddqd/tests/integration/pathological.rs index e20292e..61c69b4 100644 --- a/crates/iddqd/tests/integration/pathological.rs +++ b/crates/iddqd/tests/integration/pathological.rs @@ -7,8 +7,10 @@ use crate::panic_safety::{PanickyKey, arm_panic_after, disarm_panic}; use core::cell::Cell; use iddqd::{ - IdHashItem, IdHashMap, IdOrdItem, IdOrdMap, id_ord_map, id_upcast, + BiHashItem, BiHashMap, Equivalent, IdHashItem, IdHashMap, IdOrdItem, + IdOrdMap, TriHashItem, TriHashMap, bi_upcast, id_ord_map, id_upcast, internal::{ValidateChaos, ValidateCompact}, + tri_upcast, }; use iddqd_test_utils::unwind::catch_panic; use std::{ @@ -271,6 +273,198 @@ fn lying_eq_no_ub() { assert_eq!(count, len); } +// Test: pathological Eq on hash keys must not let table-driven mutable paths +// yield overlapping indexes after remove/reinsert. + +thread_local! { + static MISDIRECTED_EQ_MODE: Cell = const { Cell::new(false) }; +} + +struct MisdirectedEqKey { + id: u32, +} + +impl Hash for MisdirectedEqKey { + fn hash(&self, state: &mut H) { + if self.id <= 1 { + 0u32.hash(state); + } else { + self.id.hash(state); + } + } +} + +impl PartialEq for MisdirectedEqKey { + fn eq(&self, other: &Self) -> bool { + if MISDIRECTED_EQ_MODE.with(Cell::get) { + match (self.id, other.id) { + // Under correct behavior, (0, 0) and (1, 1) would return true + // and (0, 1) and (1, 0) would return false. Invert the result + // for these IDs. + (0, 1) | (1, 0) => true, + (0, 0) | (1, 1) => false, + _ => self.id == other.id, + } + } else { + self.id == other.id + } + } +} +impl Eq for MisdirectedEqKey {} + +struct ExactLookupKey { + id: u32, +} + +impl Hash for ExactLookupKey { + fn hash(&self, state: &mut H) { + if self.id <= 1 { + 0u32.hash(state); + } else { + self.id.hash(state); + } + } +} + +impl Equivalent for ExactLookupKey { + fn equivalent(&self, key: &MisdirectedEqKey) -> bool { + // This is always correct. + self.id == key.id + } +} + +#[derive(Debug)] +struct MisdirectedEqIdHashItem { + id: u32, +} + +impl IdHashItem for MisdirectedEqIdHashItem { + type Key<'a> = MisdirectedEqKey; + fn key(&self) -> Self::Key<'_> { + MisdirectedEqKey { id: self.id } + } + id_upcast!(); +} + +#[test] +fn id_hash_misdirected_eq_remove_reinsert_retain_no_aliasing() { + let mut map = IdHashMap::::with_capacity(16); + // Insert two items under correct behavior. + map.insert_unique(MisdirectedEqIdHashItem { id: 0 }).unwrap(); + map.insert_unique(MisdirectedEqIdHashItem { id: 1 }).unwrap(); + + // Now turn on the pathological behavior. + MISDIRECTED_EQ_MODE.with(|c| c.set(true)); + // The old bug was in the internal table cleanup after that lookup. It knew + // the `ItemIndex` for id 1, but searched the hash table again using the + // item's key equality. Under `MisdirectedEqKey`, key 1 matches the table + // entry for key 0 but not itself, so cleanup could remove key 0's table + // entry while removing id 1 from `ItemSet`. + // + // The fixed code uses the key only to compute the hash, then removes the + // table entry whose stored `ItemIndex` is exactly the selected index. + let removed = map.remove(&ExactLookupKey { id: 1 }).unwrap(); + MISDIRECTED_EQ_MODE.with(|c| c.set(false)); + assert_eq!(removed.id, 1); + + map.insert_unique(MisdirectedEqIdHashItem { id: 2 }).unwrap(); + + map.retain(|_| false); + assert!(map.is_empty()); +} + +#[derive(Debug)] +struct MisdirectedEqBiHashItem { + id: u32, +} + +impl BiHashItem for MisdirectedEqBiHashItem { + type K1<'a> = MisdirectedEqKey; + type K2<'a> = u32; + fn key1(&self) -> Self::K1<'_> { + MisdirectedEqKey { id: self.id } + } + fn key2(&self) -> Self::K2<'_> { + self.id + 10 + } + bi_upcast!(); +} + +#[test] +fn bi_hash_misdirected_eq_remove_reinsert_retain_no_aliasing() { + let mut map = BiHashMap::::with_capacity(16); + // Insert two items under correct behavior. + map.insert_unique(MisdirectedEqBiHashItem { id: 0 }).unwrap(); + map.insert_unique(MisdirectedEqBiHashItem { id: 1 }).unwrap(); + + // Now turn on the pathological behavior. + MISDIRECTED_EQ_MODE.with(|c| c.set(true)); + // The old bug was in the internal table cleanup after that lookup. It knew + // the `ItemIndex` for id 1, but searched the hash table again using the + // item's key equality. Under `MisdirectedEqKey`, key 1 matches the table + // entry for key 0 but not itself, so cleanup could remove key 0's table + // entry while removing id 1 from `ItemSet`. + // + // The fixed code uses the key only to compute the hash, then removes the + // table entry whose stored `ItemIndex` is exactly the selected index. + let removed = map.remove1(&ExactLookupKey { id: 1 }).unwrap(); + MISDIRECTED_EQ_MODE.with(|c| c.set(false)); + assert_eq!(removed.id, 1); + + map.insert_unique(MisdirectedEqBiHashItem { id: 2 }).unwrap(); + + map.retain(|_| false); + assert!(map.is_empty()); +} + +#[derive(Debug)] +struct MisdirectedEqTriHashItem { + id: u32, +} + +impl TriHashItem for MisdirectedEqTriHashItem { + type K1<'a> = MisdirectedEqKey; + type K2<'a> = u32; + type K3<'a> = u32; + fn key1(&self) -> Self::K1<'_> { + MisdirectedEqKey { id: self.id } + } + fn key2(&self) -> Self::K2<'_> { + self.id + 10 + } + fn key3(&self) -> Self::K3<'_> { + self.id + 20 + } + tri_upcast!(); +} + +#[test] +fn tri_hash_misdirected_eq_remove_reinsert_retain_no_aliasing() { + let mut map = TriHashMap::::with_capacity(16); + // Insert two items under correct behavior. + map.insert_unique(MisdirectedEqTriHashItem { id: 0 }).unwrap(); + map.insert_unique(MisdirectedEqTriHashItem { id: 1 }).unwrap(); + + // Now turn on the pathological behavior. + MISDIRECTED_EQ_MODE.with(|c| c.set(true)); + // The old bug was in the internal table cleanup after that lookup. It knew + // the `ItemIndex` for id 1, but searched the hash table again using the + // item's key equality. Under `MisdirectedEqKey`, key 1 matches the table + // entry for key 0 but not itself, so cleanup could remove key 0's table + // entry while removing id 1 from `ItemSet`. + // + // The fixed code uses the key only to compute the hash, then removes the + // table entry whose stored `ItemIndex` is exactly the selected index. + let removed = map.remove1(&ExactLookupKey { id: 1 }).unwrap(); + MISDIRECTED_EQ_MODE.with(|c| c.set(false)); + assert_eq!(removed.id, 1); + + map.insert_unique(MisdirectedEqTriHashItem { id: 2 }).unwrap(); + + map.retain(|_| false); + assert!(map.is_empty()); +} + #[derive(Debug)] struct AlwaysEqItem;