Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 104 additions & 17 deletions illumos-utils/src/zfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@ enum MountpointError {

#[derive(thiserror::Error, Debug)]
enum EnsureDatasetErrorRaw {
#[error("Dataset not found")]
DatasetNotFound,

#[error("ZFS execution error")]
Execution(#[from] crate::ExecutionError),

Expand All @@ -164,6 +167,20 @@ enum EnsureDatasetErrorRaw {
},
}

impl From<SetValueErrorRaw> for EnsureDatasetErrorRaw {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Part of me kind of wonders if we might want to implement DatasetNotFound as a generic wrapper around any other error that's either the inner error or DatasetNotFound, and if that might reduce some boilerplate here? Something like:

#[derive(thiserror::Error)]
enum DatasetError<T> {
    #[error("Dataset not found")]
    NotFound,
    #[error(transparent)]
    Other(#[from] T)
}

although, it's equally possible that this is actually more annoying than just doing it the current way

fn from(e: SetValueErrorRaw) -> EnsureDatasetErrorRaw {
match e {
SetValueErrorRaw::DatasetNotFound => {
EnsureDatasetErrorRaw::DatasetNotFound
}

SetValueErrorRaw::Execution(err) => {
EnsureDatasetErrorRaw::Execution(err)
}
}
}
}

/// Error returned by [`Zfs::ensure_dataset`].
#[derive(thiserror::Error, Debug)]
#[error("Failed to ensure filesystem '{name}'")]
Expand All @@ -173,18 +190,30 @@ pub struct EnsureDatasetError {
err: EnsureDatasetErrorRaw,
}

/// Error returned by [`Zfs::set_oxide_value`]
#[derive(thiserror::Error, Debug)]
enum SetValueErrorRaw {
#[error("Dataset not found")]
DatasetNotFound,

#[error(transparent)]
Execution(#[from] crate::ExecutionError),
}

/// Error returned by [`Zfs::set_oxide_value`] or [`Zfs::set_value`]
#[derive(thiserror::Error, Debug, SlogInlineError)]
#[error("Failed to set values '{values}' on filesystem {filesystem}")]
pub struct SetValueError {
filesystem: String,
values: String,
#[source]
err: crate::ExecutionError,
err: SetValueErrorRaw,
}

#[derive(thiserror::Error, Debug)]
enum GetValueErrorRaw {
#[error("Dataset not found")]
DatasetNotFound,

#[error(transparent)]
Execution(#[from] crate::ExecutionError),

Expand Down Expand Up @@ -1172,9 +1201,9 @@ pub struct DatasetVolumeDeleteArgs<'a> {
#[derive(thiserror::Error, Debug)]
#[error("Failed to remove reservation from '{name}': {err}")]
pub struct RemoveReservationError {
name: String,
pub name: String,
#[source]
err: RemoveReservationErrorInner,
pub err: RemoveReservationErrorInner,
}

impl RemoveReservationError {
Expand All @@ -1191,10 +1220,20 @@ impl RemoveReservationError {
err: RemoveReservationErrorInner::SetValue(err),
}
}

pub fn dataset_not_found(name: String) -> Self {
RemoveReservationError {
name,
err: RemoveReservationErrorInner::DatasetNotFound,
}
}
}

#[derive(thiserror::Error, Debug)]
pub enum RemoveReservationErrorInner {
#[error("Dataset not found")]
DatasetNotFound,

#[error(transparent)]
GetValue(#[from] GetValueError),

Expand Down Expand Up @@ -1575,14 +1614,33 @@ impl Zfs {
cmd.arg(format!("{name}={value}"));
}
cmd.arg(filesystem_name);
execute_async(cmd).await.map_err(|err| SetValueError {
filesystem: filesystem_name.to_string(),
values: name_values
.iter()
.map(|(k, v)| format!("{k}={v}"))
.join(","),
err,

execute_async(cmd).await.map_err(|err| {
let values =
name_values.iter().map(|(k, v)| format!("{k}={v}")).join(",");

match err {
crate::ExecutionError::CommandFailure(info)
if info.stderr.contains("does not exist") =>
{
Comment on lines +1623 to +1625
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i kinda wonder if we ought to have an execute_async wrapper for zfs commands, or something, that does this detection and returns either an ExecutionError or a DatasetNotFound, rather than forcing every caller to detect it individually? the caller-side detection seems sketchier to me, because now you have to remember to check for this contents in info.stderr every time you call a zfs command. meanwhile, having the function that runs the command do it gives the caller an enum which they are forced to handle.

take it or leave it, just a thought.

SetValueError {
filesystem: filesystem_name.to_string(),
values,
err: SetValueErrorRaw::DatasetNotFound,
}
}

_ => SetValueError {
filesystem: filesystem_name.to_string(),
values: name_values
.iter()
.map(|(k, v)| format!("{k}={v}"))
.join(","),
Comment on lines +1635 to +1638
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are now doing this twice, btw --- you have a

let values =
    name_values.iter().map(|(k, v)| format!("{k}={v}")).join(",");

at the top of the map_err closure...

Suggested change
values: name_values
.iter()
.map(|(k, v)| format!("{k}={v}"))
.join(","),
values,

err: SetValueErrorRaw::Execution(err),
},
}
})?;

Ok(())
}

Expand Down Expand Up @@ -1752,10 +1810,22 @@ impl Zfs {
cmd.arg(&all_names);
cmd.arg(filesystem_name);
let output =
execute_async(&mut cmd).await.map_err(|err| GetValueError {
filesystem: filesystem_name.to_string(),
name: format!("{:?}", names),
err: err.into(),
execute_async(&mut cmd).await.map_err(|err| match err {
crate::ExecutionError::CommandFailure(info)
if info.stderr.contains("does not exist") =>
{
GetValueError {
filesystem: filesystem_name.to_string(),
name: format!("{:?}", names),
err: GetValueErrorRaw::DatasetNotFound,
}
}

_ => GetValueError {
filesystem: filesystem_name.to_string(),
name: format!("{:?}", names),
err: err.into(),
},
})?;
let stdout = String::from_utf8_lossy(&output.stdout);
let values = stdout.trim();
Expand Down Expand Up @@ -2243,12 +2313,29 @@ impl Zfs {
name: &str,
) -> Result<(), RemoveReservationError> {
let value = Zfs::get_value(name, "reservation").await.map_err(|e| {
RemoveReservationError::get_value(name.to_string(), e)
let GetValueError { filesystem: _, name: _, err } = &e;

match err {
GetValueErrorRaw::DatasetNotFound => {
RemoveReservationError::dataset_not_found(name.to_string())
}

_ => RemoveReservationError::get_value(name.to_string(), e),
}
})?;

if value != "none" {
Zfs::set_value(name, "reservation", "none").await.map_err(|e| {
RemoveReservationError::set_value(name.to_string(), e)
let SetValueError { filesystem: _, values: _, err } = &e;
match err {
SetValueErrorRaw::DatasetNotFound => {
RemoveReservationError::dataset_not_found(
name.to_string(),
)
}

_ => RemoveReservationError::set_value(name.to_string(), e),
}
})?;
}

Expand Down
26 changes: 23 additions & 3 deletions sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ use illumos_utils::zfs::DatasetVolumeDeleteArgs;
use illumos_utils::zfs::DatasetVolumeEnsureArgs;
use illumos_utils::zfs::DestroyDatasetErrorVariant;
use illumos_utils::zfs::Mountpoint;
use illumos_utils::zfs::RemoveReservationError;
use illumos_utils::zfs::RemoveReservationErrorInner;
use illumos_utils::zfs::SizeDetails;
use illumos_utils::zfs::Zfs;
use illumos_utils::zpool::PathInPool;
Expand Down Expand Up @@ -1541,9 +1543,27 @@ impl SledAgent {
// the volume, remove the reservation set for the parent dataset if one
// exists.

Zfs::remove_reservation(&delegated_zvol.parent_dataset_name())
.await
.map_err(|e| HttpError::for_internal_error(e.to_string()))?;
let result =
Zfs::remove_reservation(&delegated_zvol.parent_dataset_name())
.await;

if let Err(e) = result {
let RemoveReservationError { name: _, err } = &e;
match err {
RemoveReservationErrorInner::DatasetNotFound => {
// If the parent dataset is no longer found, then a
// concurrent deletion occurred. Return Ok
return Ok(());
}
Comment on lines +1553 to +1557
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm, is this really the only place we might encounter a dataset that was already removed by a concurrent deletion? should we also handle this error at the point where we actually delete the child volume dataset, too?


_ => {
// Anything else is a 500
return Err(HttpError::for_internal_error(
InlineErrorChain::new(&e).to_string(),
));
}
}
}

// Then proceed with deleting the child volume dataset, then the parent
// dataset
Expand Down
Loading