Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions hypervisor/cloudhypervisor/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cloudhypervisor
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"os"
Expand Down Expand Up @@ -112,16 +113,33 @@ func shutdownVM(ctx context.Context, hc *http.Client) error {
return err
}

// pauseVM is idempotent — swallows CH's Paused→Paused 500 so a stuck-paused VM recovers.
func pauseVM(ctx context.Context, hc *http.Client) error {
_, err := vmAPIOnce(ctx, hc, "vm.pause", nil)
if err != nil && isAlreadyInStateError(err, "Paused") {
return nil
}
return err
}

// resumeVM is idempotent — swallows CH's Running→Running 500.
func resumeVM(ctx context.Context, hc *http.Client) error {
_, err := vmAPIOnce(ctx, hc, "vm.resume", nil)
if err != nil && isAlreadyInStateError(err, "Running") {
return nil
}
return err
}

// isAlreadyInStateError matches CH's exact `Invalid transition: InvalidStateTransition(<state>, <state>)` in a 500 body.
func isAlreadyInStateError(err error, state string) bool {
var ae *utils.APIError
if !errors.As(err, &ae) || ae.Code != http.StatusInternalServerError {
return false
}
return strings.Contains(ae.Message, fmt.Sprintf("Invalid transition: InvalidStateTransition(%s, %s)", state, state))
}

// snapshotVM and restoreVM temporarily extend the client timeout for
// long-running memory transfers, then restore it for subsequent calls.
func snapshotVM(ctx context.Context, hc *http.Client, destDir string) error {
Expand Down
39 changes: 39 additions & 0 deletions hypervisor/cloudhypervisor/helper_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package cloudhypervisor

import (
"errors"
"fmt"
"net/http"
"testing"

"github.com/cocoonstack/cocoon/utils"
)

func TestIsAlreadyInStateError(t *testing.T) {
chPaused := `PUT http://localhost/api/v1/vm.pause → 500: ["Error from API","The VM could not be paused","Cannot pause VM","Failed to pause migratable component","Invalid transition: InvalidStateTransition(Paused, Paused)"]`
chRunning := `PUT http://localhost/api/v1/vm.resume → 500: ["Error from API","Cannot resume VM","Failed","Invalid transition: InvalidStateTransition(Running, Running)"]`

tests := []struct {
name string
err error
state string
want bool
}{
{name: "paused paused match", err: &utils.APIError{Code: http.StatusInternalServerError, Message: chPaused}, state: "Paused", want: true},
{name: "running running match", err: &utils.APIError{Code: http.StatusInternalServerError, Message: chRunning}, state: "Running", want: true},
{name: "wrong state in match", err: &utils.APIError{Code: http.StatusInternalServerError, Message: chPaused}, state: "Running", want: false},
{name: "non-500 code", err: &utils.APIError{Code: http.StatusBadRequest, Message: chPaused}, state: "Paused", want: false},
{name: "different transition (Created→Paused)", err: &utils.APIError{Code: http.StatusInternalServerError, Message: "InvalidStateTransition(Created, Paused)"}, state: "Paused", want: false},
{name: "non-APIError", err: errors.New("dial unix: connection refused"), state: "Paused", want: false},
{name: "nil error", err: nil, state: "Paused", want: false},
{name: "wrapped APIError", err: fmt.Errorf("snapshot save: %w", &utils.APIError{Code: http.StatusInternalServerError, Message: chPaused}), state: "Paused", want: true},
{name: "empty state", err: &utils.APIError{Code: http.StatusInternalServerError, Message: chPaused}, state: "", want: false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := isAlreadyInStateError(tt.err, tt.state); got != tt.want {
t.Errorf("isAlreadyInStateError(%v, %q) = %v, want %v", tt.err, tt.state, got, tt.want)
}
})
}
}
6 changes: 2 additions & 4 deletions hypervisor/firecracker/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,7 @@ func sendCtrlAltDel(ctx context.Context, hc *http.Client) error {
return fcAPIOnce(ctx, hc, http.MethodPut, "/actions", body)
}

// pauseVM pauses a running FC instance via PATCH /vm. Non-idempotent: a
// retry after a lost response would hit "already paused" and mask success.
// pauseVM pauses a running FC instance via PATCH /vm. Idempotent: FC's vCPU event loop acks Pause from the paused state without error (vstate/vcpu.rs).
func pauseVM(ctx context.Context, hc *http.Client) error {
body, err := json.Marshal(map[string]string{"state": vmStatePaused})
if err != nil {
Expand All @@ -179,8 +178,7 @@ func pauseVM(ctx context.Context, hc *http.Client) error {
return fcAPIOnce(ctx, hc, http.MethodPatch, "/vm", body)
}

// resumeVM resumes a paused FC instance via PATCH /vm. Same non-idempotent
// shape as pauseVM.
// resumeVM resumes a paused FC instance via PATCH /vm. Idempotent like pauseVM.
func resumeVM(ctx context.Context, hc *http.Client) error {
body, err := json.Marshal(map[string]string{"state": vmStateResumed})
if err != nil {
Expand Down
Loading