Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 54 additions & 2 deletions cmd/containerd-shim-runhcs-v1/task_hcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -931,31 +931,83 @@ func (ht *hcsTask) updateTaskContainerResources(ctx context.Context, data interf
func (ht *hcsTask) updateWCOWContainerCPU(ctx context.Context, cpu *specs.WindowsCPUResources) error {
// if host is 20h2+ then we can make a request directly to hcs
if osversion.Get().Build >= osversion.V20H2 {
// Count/Maximum/Shares live on the HCS Processor schema. Only send a modify
// request when at least one of them is set, so an affinity-only update does
// not push an empty (no-op) request to HCS.
req := &hcsschema.Processor{}
hasRateControl := false
if cpu.Count != nil {
procCount := int32(*cpu.Count)
hostProcs := processorinfo.ProcessorCount()
if ht.host != nil {
hostProcs = ht.host.ProcessorCount()
}
req.Count = hcsoci.NormalizeProcessorCount(ctx, ht.id, procCount, hostProcs)
hasRateControl = true
}
if cpu.Maximum != nil {
req.Maximum = int32(*cpu.Maximum)
hasRateControl = true
}
if cpu.Shares != nil {
req.Weight = int32(*cpu.Shares)
hasRateControl = true
}
return ht.requestUpdateContainer(ctx, resourcepaths.SiloProcessorResourcePath, req)
if hasRateControl {
if err := ht.requestUpdateContainer(ctx, resourcepaths.SiloProcessorResourcePath, req); err != nil {
return err
}
}

// CPU affinity is not part of the HCS Processor schema, so it has to be
// applied out of band (the silo's job object for Argon). A no-op when unset.
if len(cpu.Affinity) > 0 {
return ht.updateWCOWContainerCPUAffinity(ctx, cpu.Affinity)
}
return nil
}

return errdefs.ErrNotImplemented
}

// updateWCOWContainerCPUAffinity honors a post-start change to
// spec.Windows.Resources.CPU.Affinity for an HCS-backed WCOW container.
//
// For process-isolated (Argon) containers this re-pins the silo's job object, using
// the same race-free mechanism as create-time: the Windows kernel re-applies the new
// mask to every process already in the silo and to every future joiner.
//
// Hypervisor-isolated (Xenon) containers require swapping the UVM's CPU group instead;
// that is not yet implemented, so this returns ErrNotImplemented rather than silently
// dropping the request.
func (ht *hcsTask) updateWCOWContainerCPUAffinity(ctx context.Context, affinity []specs.WindowsCPUGroupAffinity) error {
validated, err := hcsoci.ValidateCPUAffinityEntries(affinity)
if err != nil {
return err
}
if len(validated) == 0 {
return nil
}

if ht.host != nil {
// Xenon: UVM-level CPU-group swap is out of scope here (Track A).
return fmt.Errorf("cpu affinity update for hypervisor-isolated containers is not supported: %w", errdefs.ErrNotImplemented)
}

// ht.c speaks the cow.Container interface; the underlying implementation
// (an Argon silo) honors affinity, while others return ErrNotImplemented.
return ht.c.SetCPUGroupAffinities(ctx, hcsoci.ToJobObjectAffinities(validated))
}

func isValidWindowsCPUResources(c *specs.WindowsCPUResources) bool {
return (c.Count != nil && (c.Shares == nil && c.Maximum == nil)) ||
// Exactly one of the mutually-exclusive rate controls (Count/Shares/Maximum).
exactlyOneRateControl := (c.Count != nil && (c.Shares == nil && c.Maximum == nil)) ||
(c.Shares != nil && (c.Count == nil && c.Maximum == nil)) ||
(c.Maximum != nil && (c.Count == nil && c.Shares == nil))
// An affinity-only update carries no rate control; accept it on its own so that
// CPU affinity can be changed after the container has started.
affinityOnly := len(c.Affinity) > 0 && c.Count == nil && c.Shares == nil && c.Maximum == nil
return exactlyOneRateControl || affinityOnly
}

func (ht *hcsTask) updateWCOWResources(ctx context.Context, resources *specs.WindowsResources, annotations map[string]string) error {
Expand Down
44 changes: 44 additions & 0 deletions cmd/containerd-shim-runhcs-v1/task_hcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ package main

import (
"context"
"errors"
"math/rand"
"reflect"
"strconv"
"testing"
"time"

"github.com/Microsoft/hcsshim/internal/uvm"
"github.com/Microsoft/hcsshim/pkg/annotations"
"github.com/containerd/errdefs"
"github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -506,3 +508,45 @@ func Test_handleProcessArgsForIsolatedJobContainer(t *testing.T) {
})
}
}

func u64(v uint64) *uint64 { return &v }
func u16(v uint16) *uint16 { return &v }

func Test_isValidWindowsCPUResources(t *testing.T) {
affinity := []specs.WindowsCPUGroupAffinity{{Group: 0, Mask: 0x3}}
for _, tt := range []struct {
name string
c *specs.WindowsCPUResources
want bool
}{
{"count only", &specs.WindowsCPUResources{Count: u64(2)}, true},
{"shares only", &specs.WindowsCPUResources{Shares: u16(100)}, true},
{"maximum only", &specs.WindowsCPUResources{Maximum: u16(5000)}, true},
{"count and shares", &specs.WindowsCPUResources{Count: u64(2), Shares: u16(100)}, false},
{"affinity only", &specs.WindowsCPUResources{Affinity: affinity}, true},
{"affinity with count", &specs.WindowsCPUResources{Count: u64(2), Affinity: affinity}, true},
{"empty", &specs.WindowsCPUResources{}, false},
} {
t.Run(tt.name, func(t *testing.T) {
if got := isValidWindowsCPUResources(tt.c); got != tt.want {
t.Fatalf("isValidWindowsCPUResources(%+v) = %v, want %v", tt.c, got, tt.want)
}
})
}
}

func Test_hcsTask_updateWCOWContainerCPUAffinity_NoAffinity(t *testing.T) {
ht := &hcsTask{id: t.Name()}
// An empty affinity slice is a no-op and must not require an HCS-backed container.
if err := ht.updateWCOWContainerCPUAffinity(context.Background(), nil); err != nil {
t.Fatalf("expected nil error for empty affinity, got %v", err)
}
}

func Test_hcsTask_updateWCOWContainerCPUAffinity_XenonNotImplemented(t *testing.T) {
ht := &hcsTask{id: t.Name(), host: &uvm.UtilityVM{}}
err := ht.updateWCOWContainerCPUAffinity(context.Background(), []specs.WindowsCPUGroupAffinity{{Group: 0, Mask: 0x1}})
if !errors.Is(err, errdefs.ErrNotImplemented) {
t.Fatalf("expected ErrNotImplemented for hypervisor-isolated container, got %v", err)
}
}
7 changes: 7 additions & 0 deletions internal/cow/cow.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"github.com/Microsoft/hcsshim/internal/hcs/schema1"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
"github.com/Microsoft/hcsshim/internal/jobobject"
)

// Process is the interface for an OS process running in a container or utility VM.
Expand Down Expand Up @@ -96,4 +97,10 @@ type Container interface {
WaitError() error
// Modify sends a request to modify container resources
Modify(ctx context.Context, config interface{}) error
// SetCPUGroupAffinities pins the container's processes to the given CPU
// group affinities. It exists because CPU affinity is not part of the HCS
// container Processor schema and must be applied out of band (on the silo's
// job object for process-isolated Windows containers). Implementations that
// do not support setting CPU affinity return errdefs.ErrNotImplemented.
SetCPUGroupAffinities(ctx context.Context, affinities []jobobject.GroupAffinity) error
}
10 changes: 10 additions & 0 deletions internal/gcs/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,18 @@ package gcs
import (
"context"
"errors"
"fmt"
"sync"
"time"

"github.com/Microsoft/hcsshim/internal/cow"
"github.com/Microsoft/hcsshim/internal/gcs/prot"
"github.com/Microsoft/hcsshim/internal/hcs/schema1"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
"github.com/Microsoft/hcsshim/internal/jobobject"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/oc"
"github.com/containerd/errdefs"
"go.opencensus.io/trace"
)

Expand Down Expand Up @@ -138,6 +141,13 @@ func (c *Container) Modify(ctx context.Context, config interface{}) (err error)
return c.gc.brdg.RPC(ctx, prot.RPCModifySettings, &req, &resp, false)
}

// SetCPUGroupAffinities implements the cow.Container interface. CPU affinity is
// applied on the host's silo job object, which a guest-side container does not
// own, so this returns ErrNotImplemented.
func (c *Container) SetCPUGroupAffinities(_ context.Context, _ []jobobject.GroupAffinity) error {
return fmt.Errorf("cpu affinity is not supported for guest containers: %w", errdefs.ErrNotImplemented)
}

// Properties returns the requested container properties targeting a V1 schema prot.Container.
func (c *Container) Properties(ctx context.Context, types ...schema1.PropertyType) (_ *schema1.ContainerProperties, err error) {
ctx, span := oc.StartSpan(ctx, "gcs::Container::Properties", oc.WithClientSpanKind)
Expand Down
65 changes: 57 additions & 8 deletions internal/hcs/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,20 @@ func (computeSystem *System) Properties(ctx context.Context, types ...schema1.Pr
return properties, nil
}

// openSilo opens the container's server silo job object by its well-known name
// (`\Container_<id>`). HCS owns the silo; the only way to open it from the shim is
// by name, and only while running as SYSTEM. The caller owns the returned handle and
// must Close it.
//
// In the future we can make use of some new functionality in HCS that allows you to
// pass a job object for HCS to use for the container.
func (computeSystem *System) openSilo(ctx context.Context) (*jobobject.JobObject, error) {
return jobobject.Open(ctx, &jobobject.Options{
UseNTVariant: true,
Name: siloNameFmt(computeSystem.id),
})
}

// queryInProc handles querying for container properties without reaching out to HCS. `props`
// will be updated to contain any data returned from the queries present in `types`. If any properties
// failed to be queried they will be tallied up and returned in as the first return value. Failures on
Expand All @@ -434,14 +448,7 @@ func (computeSystem *System) queryInProc(
props *hcsschema.Properties,
types []hcsschema.PropertyType,
) ([]hcsschema.PropertyType, error) {
// In the future we can make use of some new functionality in the HCS that allows you
// to pass a job object for HCS to use for the container. Currently, the only way we'll
// be able to open the job/silo is if we're running as SYSTEM.
jobOptions := &jobobject.Options{
UseNTVariant: true,
Name: siloNameFmt(computeSystem.id),
}
job, err := jobobject.Open(ctx, jobOptions)
job, err := computeSystem.openSilo(ctx)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -535,6 +542,48 @@ func (computeSystem *System) statisticsInProc(job *jobobject.JobObject) (*hcssch
}, nil
}

// SetCPUGroupAffinities pins the container's server silo to the given processor
// group affinities. HCS does not expose a CPU-affinity field on the container Processor
// schema, so for process-isolated (Argon) containers we set the affinity directly on the
// silo's job object via SetInformationJobObject(JobObjectGroupInformationEx).
//
// HCS owns the silo; we only open a transient handle (by the silo's well-known job name,
// the same handle queryInProc opens) to record the affinity property. The kernel enforces
// it on every process that joins the silo via AssignProcessToJobObject — including the init
// process at Start and any descendants it spawns.
//
// This must be called after the compute system is created but before it is started, so the
// affinity is already recorded on the job when HCS assigns the init process. Applying it to
// an already-running silo is also safe: the kernel re-applies the mask to current members and
// migrates threads at the next scheduling dispatch.
//
// It implements the cow.Container interface.
func (computeSystem *System) SetCPUGroupAffinities(ctx context.Context, affinities []jobobject.GroupAffinity) error {
computeSystem.handleLock.RLock()
defer computeSystem.handleLock.RUnlock()

// Guard the compute system's lifecycle while we touch its silo: the RLock blocks
// a concurrent Close(), and handle == 0 means it is already torn down.
if computeSystem.handle == 0 {
return fmt.Errorf("set cpu group affinities on %s silo: %w", computeSystem.ID(), ErrAlreadyClosed)
}
// The silo job object only exists for containers, not VM-based compute systems.
if computeSystem.typ != "container" {
return fmt.Errorf("cpu group affinities are only supported on container compute systems, got %q", computeSystem.typ)
}

job, err := computeSystem.openSilo(ctx)
if err != nil {
return fmt.Errorf("open %s silo: %w", computeSystem.ID(), err)
}
defer job.Close()

if err := job.SetCPUGroupAffinities(affinities); err != nil {
return fmt.Errorf("set cpu group affinities on %s silo: %w", computeSystem.ID(), err)
}
return nil
}

// hcsPropertiesV2Query is a helper to make a HcsGetComputeSystemProperties call using the V2 schema property types.
func (computeSystem *System) hcsPropertiesV2Query(ctx context.Context, types []hcsschema.PropertyType) (*hcsschema.Properties, error) {
operation := "hcs::System::PropertiesV2"
Expand Down
Loading
Loading