diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 22d553fbdcc..3fdc071432c 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -7865,25 +7865,24 @@ async fn cmd_db_vmm_info( include_sled_id: bool, ) { use db::model::ByteCount; - let db::model::SledResourceVmm { - id: _, - sled_id, - resources: - db::model::Resources { - hardware_threads, - rss_ram: ByteCount(rss), - reservoir_ram: ByteCount(reservoir), - }, - instance_id: _, - } = resource; + + let sled_id = &resource.sled_id; + let db::model::Resources { + hardware_threads, + rss_ram: ByteCount(rss), + reservoir_ram: ByteCount(reservoir), + } = &resource.resources; + const SLED_ID: &'static str = "sled ID"; const THREADS: &'static str = "hardware threads"; const RSS: &'static str = "RSS RAM"; const RESERVOIR: &'static str = "reservoir RAM"; const WIDTH: usize = const_max_len(&[SLED_ID, THREADS, RSS, RESERVOIR]); + if include_sled_id { println!(" {SLED_ID:>WIDTH$}: {sled_id}"); } + println!(" {THREADS:>WIDTH$}: {hardware_threads}"); println!(" {RSS:>WIDTH$}: {rss}"); println!(" {RESERVOIR:>WIDTH$}: {reservoir}"); diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 5caf71e9e28..6cdc3fac1b4 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(261, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(262, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ pub static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(262, "sled-resource-vmm-instance-state-generation"), KnownVersion::new(261, "remove-add-zones-with-mupdate-override"), KnownVersion::new(260, "ereport-trim-serial-trailing-nulls"), KnownVersion::new(259, "vmm-failure-reason"), diff --git a/nexus/db-model/src/sled_resource_vmm.rs b/nexus/db-model/src/sled_resource_vmm.rs index c64716ff1b5..1dab83d85cd 100644 --- a/nexus/db-model/src/sled_resource_vmm.rs +++ b/nexus/db-model/src/sled_resource_vmm.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::Generation; use crate::typed_uuid::DbTypedUuid; use crate::{ByteCount, SqlU32}; use nexus_db_schema::schema::sled_resource_vmm; @@ -34,6 +35,14 @@ impl Resources { } } +#[derive(Clone, Debug)] +pub enum SledResourceVmmInstanceStateGeneration { + /// Row was created prior to Nexus recording this + Unspecified, + + Specified(Generation), +} + /// Describes sled resource usage by a VMM #[derive(Clone, Selectable, Queryable, Insertable, Debug)] #[diesel(table_name = sled_resource_vmm)] @@ -45,6 +54,9 @@ pub struct SledResourceVmm { pub resources: Resources, pub instance_id: Option, + + /// The instance's state_generation at the moment of reservation + instance_state_generation: Option, } impl SledResourceVmm { @@ -53,12 +65,14 @@ impl SledResourceVmm { instance_id: InstanceUuid, sled_id: SledUuid, resources: Resources, + instance_state_generation: Generation, ) -> Self { Self { id: id.into(), instance_id: Some(instance_id.into()), sled_id: sled_id.into(), resources, + instance_state_generation: Some(instance_state_generation), } } @@ -73,4 +87,16 @@ impl SledResourceVmm { pub fn instance_id(&self) -> Option { self.instance_id.map(|x| x.into()) } + + pub fn instance_state_generation( + &self, + ) -> SledResourceVmmInstanceStateGeneration { + match &self.instance_state_generation { + Some(generation) => { + SledResourceVmmInstanceStateGeneration::Specified(*generation) + } + + None => SledResourceVmmInstanceStateGeneration::Unspecified, + } + } } diff --git a/nexus/db-queries/benches/harness/db_utils.rs b/nexus/db-queries/benches/harness/db_utils.rs index 390568fb114..5d76ab9f4d7 100644 --- a/nexus/db-queries/benches/harness/db_utils.rs +++ b/nexus/db-queries/benches/harness/db_utils.rs @@ -73,6 +73,7 @@ pub async fn create_reservation( .sled_reservation_create( &opctx, instance_id, + nexus_db_model::Generation::new(), vmm_id, small_resource_request(), SledReservationConstraintBuilder::new().build(), diff --git a/nexus/db-queries/src/db/datastore/affinity.rs b/nexus/db-queries/src/db/datastore/affinity.rs index 7689ec06e39..c52afb6fcd2 100644 --- a/nexus/db-queries/src/db/datastore/affinity.rs +++ b/nexus/db-queries/src/db/datastore/affinity.rs @@ -1314,6 +1314,7 @@ mod tests { ByteCount::from_kibibytes_u32(1).into(), ByteCount::from_kibibytes_u32(1).into(), ), + nexus_db_model::Generation::new(), )) .execute_async( &*datastore.pool_connection_for_tests().await.unwrap(), diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 36872c074c6..29acaaa33bb 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -101,6 +101,8 @@ enum SledReservationError { group membership." )] RequiredAffinitySledNotValid, + #[error("Instance reservation already made for generation {generation}")] + ReservationExists { generation: external::Generation }, } impl From for external::Error { @@ -129,6 +131,11 @@ impl From for external::Error { | SledReservationError::ConflictingAntiAndAffinityConstraints => { external::Error::invalid_request(&msg) }, + // A concurrent request to place the same instance at the same + // generation number landed already. + SledReservationError::ReservationExists { generation: _ } => { + external::Error::conflict(&msg) + } } } } @@ -658,6 +665,11 @@ impl<'a> Iterator for CompleteLocalStorageAllocationLists<'a> { } } +/// This constraint prevents an instance from having multiple sled_resource_vmm +/// records for the same generation number. +const SINGLE_RESERVATION_CONSTRAINT: &'static str = + "single_vmm_reservation_per_generation"; + impl DataStore { /// Stores a new sled in the database. /// @@ -850,6 +862,7 @@ impl DataStore { &self, opctx: &OpContext, instance_id: InstanceUuid, + instance_state_generation: db::model::Generation, propolis_id: PropolisUuid, resources: db::model::Resources, constraints: db::model::SledReservationConstraints, @@ -857,6 +870,7 @@ impl DataStore { self.sled_reservation_create_inner( opctx, instance_id, + instance_state_generation, propolis_id, resources, constraints, @@ -875,6 +889,7 @@ impl DataStore { &self, opctx: &OpContext, instance_id: InstanceUuid, + instance_state_generation: db::model::Generation, propolis_id: PropolisUuid, resources: db::model::Resources, constraints: db::model::SledReservationConstraints, @@ -883,6 +898,7 @@ impl DataStore { let log = opctx.log.new(o!( "query" => "sled_reservation", "instance_id" => instance_id.to_string(), + "instance_state_generation" => instance_state_generation.to_string(), "propolis_id" => propolis_id.to_string(), )); @@ -890,22 +906,29 @@ impl DataStore { let conn = self.pool_connection_authorized(opctx).await?; - // Check if resource ID already exists - if so, return it. + // Check if resource with a matching propolis ID already exists - if so, + // return it. // // This check makes this function idempotent. Beyond this point, however // we rely on primary key constraints in the database to prevent // concurrent reservations for same propolis_id. use nexus_db_schema::schema::sled_resource_vmm::dsl as resource_dsl; - let old_resource = resource_dsl::sled_resource_vmm + let existing_resource = resource_dsl::sled_resource_vmm .filter(resource_dsl::id.eq(*propolis_id.as_untyped_uuid())) .select(SledResourceVmm::as_select()) - .limit(1) - .load_async(&*conn) - .await?; + .get_result_async(&*conn) + .await + .optional()?; - if !old_resource.is_empty() { - info!(&log, "sled reservation already occurred, returning"); - return Ok(old_resource[0].clone()); + if let Some(existing_resource) = existing_resource { + info!( + &log, + "existing sled reservation for this instance at generation \ + {:?}", + existing_resource.instance_state_generation(), + ); + + return Ok(existing_resource); } // Get a list of local storage disks attached to this instance @@ -1176,6 +1199,7 @@ impl DataStore { instance_id, sled_target, resources.clone(), + instance_state_generation, ); if !local_storage_allocation_required { @@ -1206,6 +1230,24 @@ impl DataStore { } } + Err(diesel::result::Error::DatabaseError( + diesel::result::DatabaseErrorKind::UniqueViolation, + error_info, + )) if error_info.constraint_name() + == Some(SINGLE_RESERVATION_CONSTRAINT) => + { + // The table already has a reservation for this instance + // id and generation number. + return Err( + SledReservationTransactionError::Reservation( + SledReservationError::ReservationExists { + generation: instance_state_generation + .into(), + }, + ), + ); + } + Err(e) => { if let Some(sentinel) = matches_sentinel(&e, &SLED_INSERT_QUERY_SENTINELS) @@ -1341,6 +1383,24 @@ impl DataStore { } } + Err(diesel::result::Error::DatabaseError( + diesel::result::DatabaseErrorKind::UniqueViolation, + error_info, + )) if error_info.constraint_name() + == Some(SINGLE_RESERVATION_CONSTRAINT) => + { + // The table already has a reservation for this + // instance id and generation number. + return Err( + SledReservationTransactionError::Reservation( + SledReservationError::ReservationExists { + generation: instance_state_generation + .into(), + }, + ), + ); + } + Err(e) => { if let Some(sentinel) = matches_sentinel( &e, @@ -2124,6 +2184,7 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( &opctx, InstanceUuid::new_v4(), + db::model::Generation::new(), PropolisUuid::new_v4(), resources.clone(), constraints, @@ -2145,6 +2206,7 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( &opctx, InstanceUuid::new_v4(), + db::model::Generation::new(), PropolisUuid::new_v4(), resources.clone(), constraints, @@ -2336,6 +2398,26 @@ pub(in crate::db::datastore) mod test { } } + fn from_local_storage_test_instance( + local_storage_test_instance: &LocalStorageTestInstance, + ) -> Self { + Self { + id: local_storage_test_instance.id, + groups: vec![], + force_onto_sled: None, + resources: db::model::Resources::new( + local_storage_test_instance.ncpus.into(), + local_storage_test_instance.memory.into(), + local_storage_test_instance.memory.into(), + ), + cpu_platform: None, + } + } + + fn resources(&self) -> db::model::Resources { + self.resources.clone() + } + // This is the first half of creating a sled reservation. // It can be called during tests trying to invoke contention manually. async fn find_targets( @@ -2377,6 +2459,7 @@ pub(in crate::db::datastore) mod test { datastore: &DataStore, propolis_id: PropolisUuid, sled_id: SledUuid, + state_generation: db::model::Generation, ) -> bool { assert!(self.force_onto_sled.is_none()); @@ -2385,6 +2468,7 @@ pub(in crate::db::datastore) mod test { self.id, sled_id, self.resources.clone(), + state_generation, ); let conn = datastore.pool_connection_for_tests().await.unwrap(); @@ -2490,6 +2574,7 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create_inner( &opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), instance.resources.clone(), constraints.build(), @@ -3305,6 +3390,7 @@ pub(in crate::db::datastore) mod test { &datastore, PropolisUuid::new_v4(), sleds[i].id(), + Generation::new(), ) .await, "Shouldn't have been able to insert into sled {i}" @@ -3318,6 +3404,7 @@ pub(in crate::db::datastore) mod test { &datastore, PropolisUuid::new_v4(), sleds[0].id(), + Generation::new(), ) .await ); @@ -3405,6 +3492,7 @@ pub(in crate::db::datastore) mod test { &datastore, PropolisUuid::new_v4(), sleds[0].id(), + Generation::new(), ) .await, "Shouldn't have been able to insert into sleds[0]" @@ -3417,6 +3505,7 @@ pub(in crate::db::datastore) mod test { &datastore, PropolisUuid::new_v4(), sleds[1].id(), + Generation::new(), ) .await ); @@ -3489,6 +3578,7 @@ pub(in crate::db::datastore) mod test { &datastore, PropolisUuid::new_v4(), sleds[i].id(), + Generation::new(), ) .await, "Shouldn't have been able to insert into sleds[i]" @@ -3502,6 +3592,7 @@ pub(in crate::db::datastore) mod test { &datastore, PropolisUuid::new_v4(), sleds[1].id(), + Generation::new(), ) .await ); @@ -3992,6 +4083,8 @@ pub(in crate::db::datastore) mod test { struct LocalStorageTestInstance { id: InstanceUuid, name: String, + ncpus: u16, + memory: external::ByteCount, affinity: Option<(Affinity, usize)>, disks: Vec, } @@ -4021,7 +4114,7 @@ pub(in crate::db::datastore) mod test { is_scrimlet: false, usable_hardware_threads: 128, usable_physical_ram: (64 << 30).try_into().unwrap(), - reservoir_size: (16 << 30).try_into().unwrap(), + reservoir_size: (56 << 30).try_into().unwrap(), cpu_family: SledCpuFamily::AmdMilan, }, Uuid::new_v4(), @@ -4138,6 +4231,8 @@ pub(in crate::db::datastore) mod test { &authz_project, instance.id, &instance.name.as_str(), + instance.ncpus, + instance.memory, ) .await; @@ -4395,6 +4490,8 @@ pub(in crate::db::datastore) mod test { authz_project: &authz::Project, instance_id: InstanceUuid, name: &str, + ncpus: u16, + memory: external::ByteCount, ) -> authz::Instance { datastore .project_create_instance( @@ -4408,8 +4505,8 @@ pub(in crate::db::datastore) mod test { name: name.parse().unwrap(), description: "It's an instance".into(), }, - ncpus: 2i64.try_into().unwrap(), - memory: external::ByteCount::from_gibibytes_u32(16), + ncpus: external::InstanceCpuCount(ncpus), + memory, hostname: "myhostname".try_into().unwrap(), user_data: Vec::new(), network_interfaces: @@ -4567,6 +4664,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![LocalStorageTestInstanceDisk { id: Uuid::new_v4(), @@ -4578,7 +4677,8 @@ pub(in crate::db::datastore) mod test { }; setup_local_storage_allocation_test(&opctx, datastore, &config).await; - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // the output of the find targets query does not currently take required // local storage allocations into account @@ -4589,12 +4689,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -4672,6 +4769,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![LocalStorageTestInstanceDisk { id: Uuid::new_v4(), @@ -4683,7 +4782,8 @@ pub(in crate::db::datastore) mod test { }; setup_local_storage_allocation_test(&opctx, datastore, &config).await; - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // Add an allocation for this disk to the first sled's zpool set_local_storage_unencrypted_allocation( @@ -4707,12 +4807,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -4764,6 +4861,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![LocalStorageTestInstanceDisk { id: Uuid::new_v4(), @@ -4775,7 +4874,8 @@ pub(in crate::db::datastore) mod test { }; setup_local_storage_allocation_test(&opctx, datastore, &config).await; - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); set_local_storage_unencrypted_dataset_no_provision( datastore, @@ -4792,12 +4892,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -4847,6 +4944,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![LocalStorageTestInstanceDisk { id: Uuid::new_v4(), @@ -4858,7 +4957,8 @@ pub(in crate::db::datastore) mod test { }; setup_local_storage_allocation_test(&opctx, datastore, &config).await; - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // The zpool size is 1 TiB, and the control plane buffer is 250 GiB. If // we set a crucible dataset size_used of 300 GiB, then ensure a 512 GiB @@ -4880,12 +4980,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -4903,12 +5000,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -4960,6 +5054,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![ LocalStorageTestInstanceDisk { @@ -4979,7 +5075,8 @@ pub(in crate::db::datastore) mod test { }; setup_local_storage_allocation_test(&opctx, datastore, &config).await; - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // the output of the find targets query does not currently take required // local storage allocations into account @@ -4990,12 +5087,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -5067,6 +5161,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![ LocalStorageTestInstanceDisk { @@ -5086,7 +5182,8 @@ pub(in crate::db::datastore) mod test { }; setup_local_storage_allocation_test(&opctx, datastore, &config).await; - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // the output of the find targets query does not currently take required // local storage allocations into account @@ -5097,12 +5194,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -5178,6 +5272,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![ LocalStorageTestInstanceDisk { @@ -5197,7 +5293,8 @@ pub(in crate::db::datastore) mod test { }; setup_local_storage_allocation_test(&opctx, datastore, &config).await; - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // The zpool size is 1 TiB, and the control plane buffer is 250 GiB. If // we set the first U2's crucible dataset size_used of 300 GiB, then @@ -5219,12 +5316,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -5275,12 +5369,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -5356,6 +5447,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![ LocalStorageTestInstanceDisk { @@ -5375,7 +5468,8 @@ pub(in crate::db::datastore) mod test { }; setup_local_storage_allocation_test(&opctx, datastore, &config).await; - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // One of the local storage have been allocated already. @@ -5399,12 +5493,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -5481,6 +5572,8 @@ pub(in crate::db::datastore) mod test { LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 32, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![ LocalStorageTestInstanceDisk { @@ -5504,6 +5597,8 @@ pub(in crate::db::datastore) mod test { LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local2".to_string(), + ncpus: 32, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![ LocalStorageTestInstanceDisk { @@ -5530,7 +5625,8 @@ pub(in crate::db::datastore) mod test { setup_local_storage_allocation_test(&opctx, datastore, &config).await; for instance in &config.instances { - let instance = Instance::new_with_id(instance.id); + let instance = + Instance::from_local_storage_test_instance(&instance); // the output of the find targets query does not currently take // required local storage allocations into account @@ -5540,12 +5636,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 32, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -5623,6 +5716,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![ LocalStorageTestInstanceDisk { @@ -5642,7 +5737,8 @@ pub(in crate::db::datastore) mod test { }; setup_local_storage_allocation_test(&opctx, datastore, &config).await; - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // One of the local storage have been allocated already to the first U2 @@ -5677,12 +5773,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -5756,6 +5849,9 @@ pub(in crate::db::datastore) mod test { config.instances.push(LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: format!("inst{i}"), + // consume all the available threads + ncpus: 128, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks, }); @@ -5766,7 +5862,8 @@ pub(in crate::db::datastore) mod test { let mut vmms = vec![]; for (i, config_instance) in config.instances.iter().enumerate() { - let instance = Instance::new_with_id(config_instance.id); + let instance = + Instance::from_local_storage_test_instance(&config_instance); // the output of the find targets query does not currently take // required local storage allocations into account, but each VMM @@ -5777,12 +5874,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 128, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -5867,6 +5961,9 @@ pub(in crate::db::datastore) mod test { config.instances.push(LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: format!("inst{i}"), + // consume all the available threads + ncpus: 128, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks, }); @@ -5882,7 +5979,9 @@ pub(in crate::db::datastore) mod test { let mut jhs = vec![]; for (i, config_instance) in chunk.iter().enumerate() { - let instance = Instance::new_with_id(config_instance.id); + let instance = Instance::from_local_storage_test_instance( + &config_instance, + ); let datastore = datastore.clone(); @@ -5896,12 +5995,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create_inner( &opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 128, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -5928,6 +6024,170 @@ pub(in crate::db::datastore) mod test { logctx.cleanup_successful(); } + /// Ensure that a full rack can have one VMM take all the U.2s on each sled, + /// where the sled reservations happen concurrently in chunks, and multiple + /// requests for the same instance occur. + #[tokio::test] + async fn local_storage_allocation_full_rack_concurrent_multi() { + let logctx = dev::test_setup_log( + "local_storage_allocation_full_rack_concurrent_multi", + ); + let db = TestDatabase::new_with_datastore(&logctx.log).await; + let (opctx, datastore) = (db.opctx(), db.datastore()); + + let mut config = LocalStorageTest { + sleds: vec![], + affinity_groups: vec![], + anti_affinity_groups: vec![], + instances: vec![], + }; + + const MAX_U2_PER_INSTANCE: usize = 10; + + for i in 0..32 { + let mut u2s = vec![]; + + for n in 0..MAX_U2_PER_INSTANCE { + u2s.push(LocalStorageTestSledU2 { + physical_disk_id: PhysicalDiskUuid::new_v4(), + physical_disk_serial: format!("phys_{i}_{n}"), + + zpool_id: ZpoolUuid::new_v4(), + control_plane_storage_buffer: + external::ByteCount::from_gibibytes_u32(250), + + inventory_total_size: + external::ByteCount::from_gibibytes_u32(1024), + + crucible_dataset_id: DatasetUuid::new_v4(), + crucible_dataset_addr: format!( + "[fd00:1122:3344:{i}{n:02x}::1]:12345" + ) + .parse() + .unwrap(), + + local_storage_unencrypted_dataset_id: DatasetUuid::new_v4(), + }); + } + + config.sleds.push(LocalStorageTestSled { + sled_id: SledUuid::new_v4(), + sled_serial: format!("sled_{i}"), + u2s, + }); + + let mut disks = vec![]; + + for n in 0..MAX_U2_PER_INSTANCE { + disks.push(LocalStorageTestInstanceDisk { + id: Uuid::new_v4(), + name: external::Name::try_from(format!("local-{i}-{n}")) + .unwrap(), + size: external::ByteCount::from_gibibytes_u32(512), + }); + } + + config.instances.push(LocalStorageTestInstance { + id: InstanceUuid::new_v4(), + name: format!("inst{i}"), + // consume all the available threads + ncpus: 128, + memory: external::ByteCount::from_gibibytes_u32(16), + affinity: None, + disks, + }); + } + + setup_local_storage_allocation_test(&opctx, datastore, &config).await; + + let mut vmms = vec![]; + + let mut success = 0; + + // Reserve the instances concurrently in chunks - 4 at a time, because + // qorb currently supports a max of 8, and we're issuing the same + // request twice. + for chunk in config.instances.chunks(4) { + let mut jhs = vec![]; + + for (i, config_instance) in chunk.iter().enumerate() { + let instance = Instance::from_local_storage_test_instance( + &config_instance, + ); + + // Issue the same request twice + + for _ in 0..2 { + let datastore = datastore.clone(); + + let opctx = opctx.child(BTreeMap::from([( + String::from("task"), + format!("{i}"), + )])); + + let jh = tokio::spawn({ + let resources = instance.resources(); + + async move { + datastore + .sled_reservation_create_inner( + &opctx, + instance.id, + db::model::Generation::new(), + PropolisUuid::new_v4(), + resources, + db::model::SledReservationConstraints::none( + ), + ) + .await + } + }); + + jhs.push(jh); + } + } + + for jh in jhs { + let vmm = match jh.await.unwrap() { + Ok(vmm) => { + success += 1; + vmm + } + + Err(SledReservationTransactionError::Reservation( + SledReservationError::NotFound, + )) => { + // eat this, double check number of VMMs later + continue; + } + + Err(SledReservationTransactionError::Reservation( + SledReservationError::ReservationExists { generation }, + )) if generation == external::Generation::new() => { + // eat this, it's expected due to concurrent requests + continue; + } + + Err(e) => panic!("{e:?}"), + }; + + vmms.push(vmm); + } + } + + assert_eq!(success, 32); + + let sleds: HashSet<_> = + vmms.into_iter().map(|vmm| vmm.sled_id).collect(); + + assert_eq!(sleds.len(), 32); + + validate_local_storage_allocations(&datastore).await; + + db.terminate().await; + logctx.cleanup_successful(); + } + /// Ensure that four sleds can have _many_ VMMs (enough to fully use all /// cpus) allocate local storage, where the sled reservations happen /// concurrently in chunks. @@ -6008,6 +6268,10 @@ pub(in crate::db::datastore) mod test { config.instances.push(LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: format!("inst{i}"), + ncpus: 2, + // 64 instances per sled, where each sled has 56 GB of reservoir + // RAM = 875 MB per instance + memory: external::ByteCount::from_mebibytes_u32(512), affinity: None, disks, }); @@ -6023,7 +6287,9 @@ pub(in crate::db::datastore) mod test { let mut jhs = vec![]; for (i, config_instance) in chunk.iter().enumerate() { - let instance = Instance::new_with_id(config_instance.id); + let instance = Instance::from_local_storage_test_instance( + &config_instance, + ); let datastore = datastore.clone(); @@ -6037,12 +6303,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create_inner( &opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 2, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -6143,6 +6406,8 @@ pub(in crate::db::datastore) mod test { LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: Some((Affinity::Positive, 0)), disks: vec![LocalStorageTestInstanceDisk { id: Uuid::new_v4(), @@ -6154,6 +6419,8 @@ pub(in crate::db::datastore) mod test { LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local2".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: Some((Affinity::Positive, 0)), disks: vec![LocalStorageTestInstanceDisk { id: Uuid::new_v4(), @@ -6170,7 +6437,8 @@ pub(in crate::db::datastore) mod test { // The first instance's sled reservation should succeed, there's enough // space for the disk - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // the output of the find targets query does not currently take required // local storage allocations into account @@ -6181,12 +6449,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -6196,7 +6461,8 @@ pub(in crate::db::datastore) mod test { // the affinity group's policy is set to Fail, and there isn't enough // space for the second instance's disk on the one sled. - let instance = Instance::new_with_id(config.instances[1].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[1]); // the output of the find targets query does not currently take required // local storage allocations into account @@ -6206,12 +6472,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -6270,6 +6533,8 @@ pub(in crate::db::datastore) mod test { LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: Some((Affinity::Negative, 0)), disks: vec![LocalStorageTestInstanceDisk { id: Uuid::new_v4(), @@ -6281,6 +6546,8 @@ pub(in crate::db::datastore) mod test { LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local2".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: Some((Affinity::Negative, 0)), disks: vec![LocalStorageTestInstanceDisk { id: Uuid::new_v4(), @@ -6296,7 +6563,8 @@ pub(in crate::db::datastore) mod test { // The first instance's sled reservation should succeed - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // the output of the find targets query does not currently take required // local storage allocations into account @@ -6307,12 +6575,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -6323,7 +6588,8 @@ pub(in crate::db::datastore) mod test { // The second instance's sled reservation should not succeed, because // the anti-affinity group's policy is set to Fail. - let instance = Instance::new_with_id(config.instances[1].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[1]); // the output of the find targets query does not currently take required // local storage allocations into account @@ -6333,12 +6599,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -6435,6 +6698,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![ LocalStorageTestInstanceDisk { @@ -6459,7 +6724,8 @@ pub(in crate::db::datastore) mod test { // succeed, as it needs two local storage allocations (which it could // get if it was allowed to use sled_1). - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // the output of the find targets query does not currently take required // local storage allocations into account @@ -6474,12 +6740,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), constraints, ) .await @@ -6554,6 +6817,8 @@ pub(in crate::db::datastore) mod test { instances: vec![LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local".to_string(), + ncpus: 2, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![ LocalStorageTestInstanceDisk { @@ -6574,7 +6839,8 @@ pub(in crate::db::datastore) mod test { setup_local_storage_allocation_test(&opctx, datastore, &config).await; - let instance = Instance::new_with_id(config.instances[0].id); + let instance = + Instance::from_local_storage_test_instance(&config.instances[0]); // Detach the second disk from the instance let (.., authz_instance) = LookupPath::new(&opctx, datastore) @@ -6602,12 +6868,9 @@ pub(in crate::db::datastore) mod test { .sled_reservation_create( opctx, instance.id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 1, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + instance.resources(), db::model::SledReservationConstraints::none(), ) .await @@ -6687,6 +6950,9 @@ pub(in crate::db::datastore) mod test { id: InstanceUuid::new_v4(), name: "local".to_string(), affinity: None, + // consume _almost_ all available threads + ncpus: 96, + memory: external::ByteCount::from_gibibytes_u32(16), disks: vec![LocalStorageTestInstanceDisk { id: Uuid::new_v4(), name: external::Name::try_from("local".to_string()) @@ -6697,6 +6963,9 @@ pub(in crate::db::datastore) mod test { LocalStorageTestInstance { id: InstanceUuid::new_v4(), name: "local2".to_string(), + // consume _almost_ all available threads + ncpus: 96, + memory: external::ByteCount::from_gibibytes_u32(16), affinity: None, disks: vec![LocalStorageTestInstanceDisk { id: Uuid::new_v4(), @@ -6717,19 +6986,21 @@ pub(in crate::db::datastore) mod test { let datastore = db.datastore().clone(); let opctx = OpContext::for_tests(logctx.log.clone(), datastore.clone()); - let instance_id = config.instances[0].id; + + let instance = Instance::from_local_storage_test_instance( + &config.instances[0], + ); + let instance_id = instance.id; + let resources = instance.resources(); async move { datastore .sled_reservation_create( &opctx, instance_id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 96, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + resources, db::model::SledReservationConstraints::none(), ) .await @@ -6740,19 +7011,21 @@ pub(in crate::db::datastore) mod test { let datastore = db.datastore().clone(); let opctx = OpContext::for_tests(logctx.log.clone(), datastore.clone()); - let instance_id = config.instances[1].id; + + let instance = Instance::from_local_storage_test_instance( + &config.instances[1], + ); + let instance_id = instance.id; + let resources = instance.resources(); async move { datastore .sled_reservation_create( &opctx, instance_id, + db::model::Generation::new(), PropolisUuid::new_v4(), - db::model::Resources::new( - 96, - ByteCount::try_from(1024).unwrap(), - ByteCount::try_from(1024).unwrap(), - ), + resources, db::model::SledReservationConstraints::none(), ) .await diff --git a/nexus/db-queries/src/db/queries/sled_reservation.rs b/nexus/db-queries/src/db/queries/sled_reservation.rs index cdd1c3968d2..1c39150d6d2 100644 --- a/nexus/db-queries/src/db/queries/sled_reservation.rs +++ b/nexus/db-queries/src/db/queries/sled_reservation.rs @@ -6,6 +6,7 @@ use crate::db::model::Resources; use crate::db::model::SledResourceVmm; +use crate::db::model::SledResourceVmmInstanceStateGeneration; use crate::db::raw_query_builder::QueryBuilder; use crate::db::raw_query_builder::TrustedStr; use crate::db::raw_query_builder::TypedSqlQuery; @@ -833,13 +834,14 @@ pub fn sled_insert_resource_query( // Finally, perform the INSERT if it's still valid. query.sql(" - INSERT INTO sled_resource_vmm (id, sled_id, hardware_threads, rss_ram, reservoir_ram, instance_id) + INSERT INTO sled_resource_vmm (id, sled_id, hardware_threads, rss_ram, reservoir_ram, instance_id, instance_state_generation) SELECT ").param().sql(", ").param().sql(", ").param().sql(", ").param().sql(", ").param().sql(", + ").param().sql(", ").param().sql(" WHERE EXISTS(SELECT 1 FROM insert_valid) ") @@ -848,7 +850,14 @@ pub fn sled_insert_resource_query( .bind::(resource.resources.hardware_threads) .bind::(resource.resources.rss_ram) .bind::(resource.resources.reservoir_ram) - .bind::(instance_id); + .bind::(instance_id) + .bind::, _>( + match resource.instance_state_generation() { + SledResourceVmmInstanceStateGeneration::Specified(generation) => + Some(generation), + SledResourceVmmInstanceStateGeneration::Unspecified => None, + } + ); query.query() } @@ -950,6 +959,7 @@ mod test { external::ByteCount::from_gibibytes_u32(0), ), ), + model::Generation::new(), ); // with no local storage @@ -1022,6 +1032,7 @@ mod test { external::ByteCount::from_gibibytes_u32(0), ), ), + model::Generation::new(), ); let query = sled_insert_resource_query( diff --git a/nexus/db-queries/tests/output/sled_insert_resource_query.sql b/nexus/db-queries/tests/output/sled_insert_resource_query.sql index 354925ee63e..f6f40a15f28 100644 --- a/nexus/db-queries/tests/output/sled_insert_resource_query.sql +++ b/nexus/db-queries/tests/output/sled_insert_resource_query.sql @@ -116,8 +116,9 @@ WITH ) INSERT INTO - sled_resource_vmm (id, sled_id, hardware_threads, rss_ram, reservoir_ram, instance_id) + sled_resource_vmm + (id, sled_id, hardware_threads, rss_ram, reservoir_ram, instance_id, instance_state_generation) SELECT - $11, $12, $13, $14, $15, $16 + $11, $12, $13, $14, $15, $16, $17 WHERE EXISTS(SELECT 1 FROM insert_valid) diff --git a/nexus/db-queries/tests/output/sled_insert_resource_query_with_local_storage.sql b/nexus/db-queries/tests/output/sled_insert_resource_query_with_local_storage.sql index f1d6a6b6643..de1641894bf 100644 --- a/nexus/db-queries/tests/output/sled_insert_resource_query_with_local_storage.sql +++ b/nexus/db-queries/tests/output/sled_insert_resource_query_with_local_storage.sql @@ -298,8 +298,9 @@ WITH ) INSERT INTO - sled_resource_vmm (id, sled_id, hardware_threads, rss_ram, reservoir_ram, instance_id) + sled_resource_vmm + (id, sled_id, hardware_threads, rss_ram, reservoir_ram, instance_id, instance_state_generation) SELECT - $45, $46, $47, $48, $49, $50 + $45, $46, $47, $48, $49, $50, $51 WHERE EXISTS(SELECT 1 FROM insert_valid) diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index 05d6c96b1db..6ca7893674a 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1103,6 +1103,7 @@ table! { rss_ram -> Int8, reservoir_ram -> Int8, instance_id -> Nullable, + instance_state_generation -> Nullable, } } diff --git a/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs index 40d6c04b2d7..76b3d67369f 100644 --- a/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs +++ b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs @@ -196,6 +196,7 @@ mod tests { use super::*; use chrono::Utc; + use nexus_db_lookup::LookupPath; use nexus_db_model::ByteCount; use nexus_db_model::Generation; use nexus_db_model::Resources; @@ -203,6 +204,7 @@ mod tests { use nexus_db_model::Vmm; use nexus_db_model::VmmCpuPlatform; use nexus_db_model::VmmState; + use nexus_db_queries::authz; use nexus_test_utils::resource_helpers; use nexus_test_utils_macros::nexus_test; use omicron_uuid_kinds::InstanceUuid; @@ -255,19 +257,29 @@ mod tests { ) .await .expect("destroyed vmm record should be created successfully"); + let resources = Resources::new( 1, // Just require the bare non-zero amount of RAM. ByteCount::try_from(1024).unwrap(), ByteCount::try_from(1024).unwrap(), ); + let constraints = nexus_db_model::SledReservationConstraints::none(); + + let (.., db_instance) = LookupPath::new(&opctx, datastore) + .instance_id(instance.identity.id) + .fetch_for(authz::Action::Read) + .await + .unwrap(); + dbg!( datastore .sled_reservation_create( &opctx, InstanceUuid::from_untyped_uuid(instance.identity.id), + db_instance.state_generation, destroyed_vmm_id, resources.clone(), constraints, @@ -275,6 +287,7 @@ mod tests { .await .expect("sled reservation should be created successfully") ); + Self { destroyed_vmm_id } } diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index 2b0e32d6629..c3be63e3674 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -12,8 +12,9 @@ use crate::app::instance_network::InstanceNetworkFilters; use http::StatusCode; use nexus_db_lookup::LookupPath; use nexus_db_model::{ - ByteCount, ExternalIp, InstanceState, IpAttachState, IpNet, NatEntry, - SledReservationConstraints, SledResourceVmm, VmmCpuPlatform, VmmState, + ByteCount, ExternalIp, Generation, InstanceState, IpAttachState, IpNet, + NatEntry, SledReservationConstraints, SledResourceVmm, VmmCpuPlatform, + VmmState, }; use nexus_db_queries::authz; use nexus_db_queries::db::datastore::ExternalSubnetBeginOpResult; @@ -42,6 +43,7 @@ pub(super) struct VmmAndSledIds { pub async fn reserve_vmm_resources( nexus: &Nexus, instance_id: InstanceUuid, + instance_state_generation: Generation, propolis_id: PropolisUuid, ncpus: u32, guest_memory: ByteCount, @@ -74,6 +76,7 @@ pub async fn reserve_vmm_resources( let resource = nexus .reserve_on_random_sled( instance_id, + instance_state_generation, propolis_id, resources, constraints, diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index a713a8335d3..aad57559d87 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -237,6 +237,7 @@ async fn sim_reserve_sled_resources( let resource = super::instance_common::reserve_vmm_resources( osagactx.nexus(), InstanceUuid::from_untyped_uuid(params.instance.id()), + params.instance.state_generation, propolis_id, u32::from(params.instance.ncpus.0.0), params.instance.memory, diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 0899ffcd474..22f5e4ad110 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -231,6 +231,7 @@ async fn sis_alloc_server( let resource = super::instance_common::reserve_vmm_resources( osagactx.nexus(), InstanceUuid::from_untyped_uuid(params.db_instance.id()), + params.db_instance.state_generation, propolis_id, u32::from(hardware_threads.0), reservoir_ram, diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index d2ee2f40c44..5732f7e0e7a 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -211,6 +211,7 @@ impl super::Nexus { pub(crate) async fn reserve_on_random_sled( &self, instance_id: InstanceUuid, + instance_state_generation: db::model::Generation, propolis_id: PropolisUuid, resources: db::model::Resources, constraints: db::model::SledReservationConstraints, @@ -219,6 +220,7 @@ impl super::Nexus { .sled_reservation_create( &self.opctx_alloc, instance_id, + instance_state_generation, propolis_id, resources, constraints, diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 308379c85c5..61138906145 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -307,7 +307,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled_resource_vmm ( -- If we tried to backfill + make this column non-nullable while that saga -- was mid-execution, we would still have some rows in this table with nullable -- values that would be more complex to fix. - instance_id UUID + instance_id UUID, + + -- The instance's state generation number at the moment of reservation + instance_state_generation INT ); -- Allow looking up all VMM resources which reside on a sled @@ -316,6 +319,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_vmm_resource_by_sled ON omicron.public. id ); +-- Allow a single VMM reservation per instance state generation +CREATE UNIQUE INDEX IF NOT EXISTS single_vmm_reservation_per_generation ON omicron.public.sled_resource_vmm ( + instance_id, + instance_state_generation +); + -- Allow looking up all resources by instance CREATE INDEX IF NOT EXISTS lookup_vmm_resource_by_instance ON omicron.public.sled_resource_vmm ( instance_id @@ -8623,7 +8632,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '261.0.0', NULL) + (TRUE, NOW(), NOW(), '262.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/sled-resource-vmm-instance-state-generation/up01.sql b/schema/crdb/sled-resource-vmm-instance-state-generation/up01.sql new file mode 100644 index 00000000000..90127964822 --- /dev/null +++ b/schema/crdb/sled-resource-vmm-instance-state-generation/up01.sql @@ -0,0 +1,3 @@ +ALTER TABLE omicron.public.sled_resource_vmm + ADD COLUMN IF NOT EXISTS instance_state_generation INT + DEFAULT NULL; diff --git a/schema/crdb/sled-resource-vmm-instance-state-generation/up02.sql b/schema/crdb/sled-resource-vmm-instance-state-generation/up02.sql new file mode 100644 index 00000000000..0a78b4a8972 --- /dev/null +++ b/schema/crdb/sled-resource-vmm-instance-state-generation/up02.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS single_vmm_reservation_per_generation ON omicron.public.sled_resource_vmm ( + instance_id, + instance_state_generation +); diff --git a/schema/crdb/sled-resource-vmm-instance-state-generation/up02.verify.sql b/schema/crdb/sled-resource-vmm-instance-state-generation/up02.verify.sql new file mode 100644 index 00000000000..b90c7e1eb74 --- /dev/null +++ b/schema/crdb/sled-resource-vmm-instance-state-generation/up02.verify.sql @@ -0,0 +1,2 @@ +-- DO NOT EDIT. Generated by test_migration_verification_files. +SELECT CAST(IF((SELECT true WHERE EXISTS (SELECT index_name FROM omicron.crdb_internal.table_indexes WHERE descriptor_name = 'sled_resource_vmm' AND index_name = 'single_vmm_reservation_per_generation')),'true','Schema change verification failed: index single_vmm_reservation_per_generation on table sled_resource_vmm does not exist') AS BOOL);