From b5fedaaf9a2b7bbd330aeffa822f7ecff0ef27b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Mon, 16 Mar 2026 11:31:50 +0100 Subject: [PATCH 1/7] :seedling: Robot: refresh stale cache for young nodes and warn on repeated misses Robot API calls get cached. This can lead to wrong responses from ccm, when a bm machine was just created or renamed. For these cases contact Robot API and update the cache. But only do that for young nodes. --- hcloud/instances.go | 30 +++++- hcloud/instances_test.go | 144 ++++++++++++++++++++++++++ hcloud/util.go | 53 +++++++++- internal/robot/client/cache/client.go | 57 +++++----- 4 files changed, 248 insertions(+), 36 deletions(-) diff --git a/hcloud/instances.go b/hcloud/instances.go index 4a943a4e3..8dae844b9 100644 --- a/hcloud/instances.go +++ b/hcloud/instances.go @@ -19,6 +19,7 @@ package hcloud import ( "context" "fmt" + "sync" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/syself/hetzner-cloud-controller-manager/internal/legacydatacenter" @@ -44,12 +45,21 @@ type instances struct { robotClient robotclient.Client addressFamily addressFamily networkID int64 + + robotMissMu sync.Mutex + robotMissByName map[string]int } var errServerNotFound = fmt.Errorf("server not found") func newInstances(client *hcloud.Client, robotClient robotclient.Client, addressFamily addressFamily, networkID int64) *instances { - return &instances{client, robotClient, addressFamily, networkID} + return &instances{ + client: client, + robotClient: robotClient, + addressFamily: addressFamily, + networkID: networkID, + robotMissByName: make(map[string]int), + } } // lookupServer attempts to locate the corresponding hcloud.Server or models.Server (robot server) for a given v1.Node. @@ -95,11 +105,29 @@ func (i *instances) lookupServer( if err != nil { return nil, nil, false, fmt.Errorf("failed to get robot server %q: %w", string(node.Name), err) } + i.trackRobotServerMiss(node, bmServer) } } return hcloudServer, bmServer, isHCloudServer, nil } +func (i *instances) trackRobotServerMiss(node *corev1.Node, bmServer *models.Server) { + if node == nil || node.Name == "" { + return + } + + i.robotMissMu.Lock() + defer i.robotMissMu.Unlock() + + if bmServer != nil || !isYoungNode(node) { + delete(i.robotMissByName, string(node.Name)) + return + } + + i.robotMissByName[string(node.Name)]++ + logRepeatedYoungNodeRobotMiss(string(node.Name), i.robotMissByName[string(node.Name)]) +} + func (i *instances) InstanceExists(ctx context.Context, node *corev1.Node) (bool, error) { const op = "hcloud/instancesv2.InstanceExists" metrics.OperationCalled.WithLabelValues(op).Inc() diff --git a/hcloud/instances_test.go b/hcloud/instances_test.go index 21c2f56bf..87b56c702 100644 --- a/hcloud/instances_test.go +++ b/hcloud/instances_test.go @@ -17,19 +17,24 @@ limitations under the License. package hcloud import ( + "bytes" "context" "encoding/json" "net" "net/http" "reflect" + "strings" "testing" + "time" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/hetznercloud/hcloud-go/v2/hcloud/schema" + "github.com/syself/hetzner-cloud-controller-manager/internal/robot/client/cache" "github.com/syself/hrobot-go/models" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" cloudprovider "k8s.io/cloud-provider" + "k8s.io/klog/v2" ) // TestInstances_InstanceExists also tests [lookupServer]. The other tests @@ -173,6 +178,145 @@ func TestInstances_InstanceExists(t *testing.T) { } } +func TestInstances_InstanceExistsRobotServerCreatedAfterCacheFill(t *testing.T) { + env := newTestEnv() + defer env.Teardown() + + resetEnv := Setenv(t, + "ROBOT_USER_NAME", "user", + "ROBOT_PASSWORD", "pass", + "CACHE_TIMEOUT", "1h", + ) + defer resetEnv() + + servers := []models.Server{ + { + ServerIP: "123.123.123.123", + ServerIPv6Net: "2a01:f48:111:4221::", + ServerNumber: 321, + Name: "bm-existing", + }, + } + env.Mux.HandleFunc("/robot/server", func(w http.ResponseWriter, _ *http.Request) { + responses := make([]models.ServerResponse, 0, len(servers)) + for _, server := range servers { + responses = append(responses, models.ServerResponse{Server: server}) + } + json.NewEncoder(w).Encode(responses) + }) + + robotClient, err := cache.NewCachedRobotClient(t.TempDir(), env.Server.Client(), env.Server.URL+"/robot") + if err != nil { + t.Fatalf("Unexpected error creating cached robot client: %v", err) + } + + instances := newInstances(env.Client, robotClient, AddressFamilyIPv4, 0) + creationTime := metav1.NewTime(time.Now()) + + // Warm the cache while bm-new does not exist yet. + exists, err := instances.InstanceExists(context.TODO(), &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bm-existing", + CreationTimestamp: creationTime, + }, + }) + if err != nil { + t.Fatalf("Unexpected error warming cache: %v", err) + } + if !exists { + t.Fatal("Expected bm-existing to exist") + } + + servers = append(servers, models.Server{ + ServerIP: "123.123.123.124", + ServerIPv6Net: "2a01:f48:111:4222::", + ServerNumber: 322, + Name: "bm-new", + }) + + exists, err = instances.InstanceExists(context.TODO(), &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bm-new", + CreationTimestamp: creationTime, + }, + }) + if err != nil { + t.Fatalf("Unexpected error for bm-new: %v", err) + } + if !exists { + t.Fatal("Expected bm-new to exist after it was created") + } +} + +func TestInstances_InstanceExistsRobotServerLogsSecondYoungNodeMiss(t *testing.T) { + env := newTestEnv() + defer env.Teardown() + + resetEnv := Setenv(t, + "ROBOT_USER_NAME", "user", + "ROBOT_PASSWORD", "pass", + "CACHE_TIMEOUT", "1h", + ) + defer resetEnv() + + env.Mux.HandleFunc("/robot/server", func(w http.ResponseWriter, _ *http.Request) { + json.NewEncoder(w).Encode([]models.ServerResponse{ + { + Server: models.Server{ + ServerIP: "123.123.123.123", + ServerIPv6Net: "2a01:f48:111:4221::", + ServerNumber: 321, + Name: "bm-existing", + }, + }, + }) + }) + + robotClient, err := cache.NewCachedRobotClient(t.TempDir(), env.Server.Client(), env.Server.URL+"/robot") + if err != nil { + t.Fatalf("Unexpected error creating cached robot client: %v", err) + } + + instances := newInstances(env.Client, robotClient, AddressFamilyIPv4, 0) + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bm-new", + CreationTimestamp: metav1.NewTime(time.Now()), + }, + } + + state := klog.CaptureState() + defer state.Restore() + + var logs bytes.Buffer + klog.LogToStderr(false) + klog.SetOutput(&logs) + + exists, err := instances.InstanceExists(context.TODO(), node) + if err != nil { + t.Fatalf("Unexpected error on first miss: %v", err) + } + if exists { + t.Fatal("Expected bm-new to be missing on first lookup") + } + klog.Flush() + if strings.Contains(logs.String(), "still missing in robot") { + t.Fatal("Did not expect warning log on first miss") + } + + exists, err = instances.InstanceExists(context.TODO(), node) + if err != nil { + t.Fatalf("Unexpected error on second miss: %v", err) + } + if exists { + t.Fatal("Expected bm-new to be missing on second lookup") + } + klog.Flush() + if !strings.Contains(logs.String(), `young node "bm-new" still missing in robot after 2 lookup misses`) { + t.Fatalf("Expected warning log after second miss, got %q", logs.String()) + } +} + func TestInstances_InstanceShutdown(t *testing.T) { env := newTestEnv() defer env.Teardown() diff --git a/hcloud/util.go b/hcloud/util.go index e031ca6dc..59f22a844 100644 --- a/hcloud/util.go +++ b/hcloud/util.go @@ -21,6 +21,7 @@ import ( "fmt" "regexp" "strings" + "time" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/syself/hetzner-cloud-controller-manager/internal/hcops" @@ -28,8 +29,15 @@ import ( robotclient "github.com/syself/hetzner-cloud-controller-manager/internal/robot/client" "github.com/syself/hrobot-go/models" corev1 "k8s.io/api/core/v1" + "k8s.io/klog/v2" ) +var youngRobotServerLookupWindow = 10 * time.Minute + +type robotServerListFreshClient interface { + ServerGetListFresh() ([]models.Server, error) +} + func getHCloudServerByName(ctx context.Context, c *hcloud.Client, name string) (*hcloud.Server, error) { const op = "hcloud/getServerByName" metrics.OperationCalled.WithLabelValues(op).Inc() @@ -71,13 +79,23 @@ func getRobotServerByName(c robotclient.Client, node *corev1.Node) (server *mode return nil, fmt.Errorf("%s: %w", op, err) } - for i, s := range serverList { - if s.Name == node.Name { - server = &serverList[i] - } + server = findRobotServerByName(serverList, string(node.Name)) + if server != nil || !isYoungNode(node) { + return server, nil } - return server, nil + freshClient, ok := c.(robotServerListFreshClient) + if !ok { + return nil, nil + } + + serverList, err = freshClient.ServerGetListFresh() + if err != nil { + hcops.HandleRateLimitExceededError(err, node) + return nil, fmt.Errorf("%s: refresh for young node: %w", op, err) + } + + return findRobotServerByName(serverList, string(node.Name)), nil } func getRobotServerByID(c robotclient.Client, id int, node *corev1.Node) (s *models.Server, e error) { @@ -116,6 +134,31 @@ func getRobotServerByID(c robotclient.Client, id int, node *corev1.Node) (s *mod return server, nil } +func findRobotServerByName(serverList []models.Server, name string) *models.Server { + for i, s := range serverList { + if s.Name == name { + return &serverList[i] + } + } + return nil +} + +func isYoungNode(node *corev1.Node) bool { + if node == nil || node.CreationTimestamp.IsZero() { + return false + } + + return time.Since(node.CreationTimestamp.Time) <= youngRobotServerLookupWindow +} + +func logRepeatedYoungNodeRobotMiss(nodeName string, missCount int) { + if missCount != 2 { + return + } + + klog.Warningf("young node %q still missing in robot after %d lookup misses", nodeName, missCount) +} + func isHCloudServerByName(name string) bool { return !strings.HasPrefix(name, hostNamePrefixRobot) } diff --git a/internal/robot/client/cache/client.go b/internal/robot/client/cache/client.go index 29d02b961..12a67cffa 100644 --- a/internal/robot/client/cache/client.go +++ b/internal/robot/client/cache/client.go @@ -90,22 +90,9 @@ func NewCachedRobotClient(rootDir string, httpClient *http.Client, baseURL strin func (c *cacheRobotClient) ServerGet(id int) (*models.Server, error) { if c.shouldSync() { - list, err := c.robotClient.ServerGetList() - if err != nil { + if _, err := c.sync(); err != nil { return nil, err } - - // populate list - c.l = list - - // remove all entries from map and populate it freshly - c.m = make(map[int]*models.Server) - for i, server := range list { - c.m[server.ServerNumber] = &list[i] - } - - // set time of last update - c.lastUpdate = time.Now() } server, found := c.m[id] @@ -119,27 +106,16 @@ func (c *cacheRobotClient) ServerGet(id int) (*models.Server, error) { func (c *cacheRobotClient) ServerGetList() ([]models.Server, error) { if c.shouldSync() { - list, err := c.robotClient.ServerGetList() - if err != nil { - return list, err - } - - // populate list - c.l = list - - // remove all entries from map and populate it freshly - c.m = make(map[int]*models.Server) - for i, server := range list { - c.m[server.ServerNumber] = &list[i] - } - - // set time of last update - c.lastUpdate = time.Now() + return c.sync() } return c.l, nil } +func (c *cacheRobotClient) ServerGetListFresh() ([]models.Server, error) { + return c.sync() +} + func (c *cacheRobotClient) shouldSync() bool { // map is nil means we have no cached value yet if c.m == nil { @@ -161,3 +137,24 @@ func (c *cacheRobotClient) SetCredentials(username, password string) error { c.m = nil return nil } + +func (c *cacheRobotClient) sync() ([]models.Server, error) { + list, err := c.robotClient.ServerGetList() + if err != nil { + return list, err + } + + // populate list + c.l = list + + // remove all entries from map and populate it freshly + c.m = make(map[int]*models.Server) + for i, server := range list { + c.m[server.ServerNumber] = &list[i] + } + + // set time of last update + c.lastUpdate = time.Now() + + return c.l, nil +} From 2e18eb7dd8209203727fd89b6e575f4799aa7852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Mon, 16 Mar 2026 11:39:05 +0100 Subject: [PATCH 2/7] fix(robot): refresh stale cache for young nodes and warn on repeated misses --- hcloud/instances.go | 13 +++++++------ hcloud/instances_test.go | 3 +++ hcloud/util.go | 5 +++-- internal/robot/client/cache/client.go | 3 ++- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/hcloud/instances.go b/hcloud/instances.go index 8dae844b9..1719fec40 100644 --- a/hcloud/instances.go +++ b/hcloud/instances.go @@ -46,18 +46,19 @@ type instances struct { addressFamily addressFamily networkID int64 - robotMissMu sync.Mutex - robotMissByName map[string]int + robotMissMu sync.Mutex + // robotMissByName counts repeated misses for young bare-metal nodes by name. + robotMissByName map[string]int } var errServerNotFound = fmt.Errorf("server not found") func newInstances(client *hcloud.Client, robotClient robotclient.Client, addressFamily addressFamily, networkID int64) *instances { return &instances{ - client: client, - robotClient: robotClient, - addressFamily: addressFamily, - networkID: networkID, + client: client, + robotClient: robotClient, + addressFamily: addressFamily, + networkID: networkID, robotMissByName: make(map[string]int), } } diff --git a/hcloud/instances_test.go b/hcloud/instances_test.go index 87b56c702..de4e6a9ca 100644 --- a/hcloud/instances_test.go +++ b/hcloud/instances_test.go @@ -189,6 +189,7 @@ func TestInstances_InstanceExistsRobotServerCreatedAfterCacheFill(t *testing.T) ) defer resetEnv() + // servers backs the Robot list response and is mutated during the test. servers := []models.Server{ { ServerIP: "123.123.123.123", @@ -211,6 +212,7 @@ func TestInstances_InstanceExistsRobotServerCreatedAfterCacheFill(t *testing.T) } instances := newInstances(env.Client, robotClient, AddressFamilyIPv4, 0) + // creationTime keeps the test nodes inside the young-node refresh window. creationTime := metav1.NewTime(time.Now()) // Warm the cache while bm-new does not exist yet. @@ -288,6 +290,7 @@ func TestInstances_InstanceExistsRobotServerLogsSecondYoungNodeMiss(t *testing.T state := klog.CaptureState() defer state.Restore() + // logs captures klog output so the warning can be asserted directly. var logs bytes.Buffer klog.LogToStderr(false) klog.SetOutput(&logs) diff --git a/hcloud/util.go b/hcloud/util.go index 59f22a844..dd5f41145 100644 --- a/hcloud/util.go +++ b/hcloud/util.go @@ -32,10 +32,11 @@ import ( "k8s.io/klog/v2" ) +// youngRobotServerLookupWindow limits forced Robot refreshes to newly created nodes. var youngRobotServerLookupWindow = 10 * time.Minute type robotServerListFreshClient interface { - ServerGetListFresh() ([]models.Server, error) + ServerGetListForceRefresh() ([]models.Server, error) } func getHCloudServerByName(ctx context.Context, c *hcloud.Client, name string) (*hcloud.Server, error) { @@ -89,7 +90,7 @@ func getRobotServerByName(c robotclient.Client, node *corev1.Node) (server *mode return nil, nil } - serverList, err = freshClient.ServerGetListFresh() + serverList, err = freshClient.ServerGetListForceRefresh() if err != nil { hcops.HandleRateLimitExceededError(err, node) return nil, fmt.Errorf("%s: refresh for young node: %w", op, err) diff --git a/internal/robot/client/cache/client.go b/internal/robot/client/cache/client.go index 12a67cffa..d372b0b6b 100644 --- a/internal/robot/client/cache/client.go +++ b/internal/robot/client/cache/client.go @@ -112,7 +112,8 @@ func (c *cacheRobotClient) ServerGetList() ([]models.Server, error) { return c.l, nil } -func (c *cacheRobotClient) ServerGetListFresh() ([]models.Server, error) { +// ServerGetListForceRefresh bypasses the timeout check and reloads the cache from Robot. +func (c *cacheRobotClient) ServerGetListForceRefresh() ([]models.Server, error) { return c.sync() } From d3d3f6a064f6c85aea06eb1c9a665ece44c79474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Mon, 16 Mar 2026 11:54:44 +0100 Subject: [PATCH 3/7] docs. --- hcloud/instances.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hcloud/instances.go b/hcloud/instances.go index 1719fec40..ee37f9745 100644 --- a/hcloud/instances.go +++ b/hcloud/instances.go @@ -112,6 +112,8 @@ func (i *instances) lookupServer( return hcloudServer, bmServer, isHCloudServer, nil } +// trackRobotServerMiss remembers repeated misses for young bare-metal nodes and +// emits a warning on the second miss to surface unexpected stale-cache behavior. func (i *instances) trackRobotServerMiss(node *corev1.Node, bmServer *models.Server) { if node == nil || node.Name == "" { return From 0507beba066409f452b6dac13548637e1f201950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Mon, 16 Mar 2026 12:23:31 +0100 Subject: [PATCH 4/7] test: fix lint warning in robot cache regression test --- hcloud/instances_test.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/hcloud/instances_test.go b/hcloud/instances_test.go index de4e6a9ca..c6089e943 100644 --- a/hcloud/instances_test.go +++ b/hcloud/instances_test.go @@ -190,14 +190,13 @@ func TestInstances_InstanceExistsRobotServerCreatedAfterCacheFill(t *testing.T) defer resetEnv() // servers backs the Robot list response and is mutated during the test. - servers := []models.Server{ - { - ServerIP: "123.123.123.123", - ServerIPv6Net: "2a01:f48:111:4221::", - ServerNumber: 321, - Name: "bm-existing", - }, - } + servers := make([]models.Server, 0, 2) + servers = append(servers, models.Server{ + ServerIP: "123.123.123.123", + ServerIPv6Net: "2a01:f48:111:4221::", + ServerNumber: 321, + Name: "bm-existing", + }) env.Mux.HandleFunc("/robot/server", func(w http.ResponseWriter, _ *http.Request) { responses := make([]models.ServerResponse, 0, len(servers)) for _, server := range servers { From 4460b26e6e2f4a03d50ce0f5faa81145d5994fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Mon, 16 Mar 2026 12:24:38 +0100 Subject: [PATCH 5/7] refactor(robot): align force-refresh naming --- hcloud/util.go | 6 +++--- internal/robot/client/cache/client.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hcloud/util.go b/hcloud/util.go index dd5f41145..eb526e7c4 100644 --- a/hcloud/util.go +++ b/hcloud/util.go @@ -35,7 +35,7 @@ import ( // youngRobotServerLookupWindow limits forced Robot refreshes to newly created nodes. var youngRobotServerLookupWindow = 10 * time.Minute -type robotServerListFreshClient interface { +type robotServerListForceRefreshClient interface { ServerGetListForceRefresh() ([]models.Server, error) } @@ -85,12 +85,12 @@ func getRobotServerByName(c robotclient.Client, node *corev1.Node) (server *mode return server, nil } - freshClient, ok := c.(robotServerListFreshClient) + forceRefreshClient, ok := c.(robotServerListForceRefreshClient) if !ok { return nil, nil } - serverList, err = freshClient.ServerGetListForceRefresh() + serverList, err = forceRefreshClient.ServerGetListForceRefresh() if err != nil { hcops.HandleRateLimitExceededError(err, node) return nil, fmt.Errorf("%s: refresh for young node: %w", op, err) diff --git a/internal/robot/client/cache/client.go b/internal/robot/client/cache/client.go index d372b0b6b..481f83293 100644 --- a/internal/robot/client/cache/client.go +++ b/internal/robot/client/cache/client.go @@ -148,7 +148,7 @@ func (c *cacheRobotClient) sync() ([]models.Server, error) { // populate list c.l = list - // remove all entries from map and populate it freshly + // remove all entries from map and repopulate it from the current list c.m = make(map[int]*models.Server) for i, server := range list { c.m[server.ServerNumber] = &list[i] From 586c9ca714dbf1cd39d43a31fb211de2c6bbd6a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Mon, 16 Mar 2026 12:26:45 +0100 Subject: [PATCH 6/7] ... --- hcloud/cloud_test.go | 13 +++++++++++-- hcloud/util.go | 11 +---------- internal/mocks/robot.go | 4 ++++ internal/robot/client/interface.go | 1 + 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/hcloud/cloud_test.go b/hcloud/cloud_test.go index f0ff06969..d092c8d90 100644 --- a/hcloud/cloud_test.go +++ b/hcloud/cloud_test.go @@ -34,6 +34,7 @@ import ( "github.com/syself/hetzner-cloud-controller-manager/internal/annotation" "github.com/syself/hetzner-cloud-controller-manager/internal/credentials" "github.com/syself/hetzner-cloud-controller-manager/internal/hcops" + robotclient "github.com/syself/hetzner-cloud-controller-manager/internal/robot/client" hrobot "github.com/syself/hrobot-go" "github.com/syself/hrobot-go/models" corev1 "k8s.io/api/core/v1" @@ -44,7 +45,15 @@ type testEnv struct { Server *httptest.Server Mux *http.ServeMux Client *hcloud.Client - RobotClient hrobot.RobotClient + RobotClient robotclient.Client +} + +type testRobotClient struct { + hrobot.RobotClient +} + +func (c testRobotClient) ServerGetListForceRefresh() ([]models.Server, error) { + return c.ServerGetList() } func (env *testEnv) Teardown() { @@ -70,7 +79,7 @@ func newTestEnv() testEnv { Server: server, Mux: mux, Client: client, - RobotClient: robotClient, + RobotClient: testRobotClient{RobotClient: robotClient}, } } diff --git a/hcloud/util.go b/hcloud/util.go index eb526e7c4..99c7b4689 100644 --- a/hcloud/util.go +++ b/hcloud/util.go @@ -35,10 +35,6 @@ import ( // youngRobotServerLookupWindow limits forced Robot refreshes to newly created nodes. var youngRobotServerLookupWindow = 10 * time.Minute -type robotServerListForceRefreshClient interface { - ServerGetListForceRefresh() ([]models.Server, error) -} - func getHCloudServerByName(ctx context.Context, c *hcloud.Client, name string) (*hcloud.Server, error) { const op = "hcloud/getServerByName" metrics.OperationCalled.WithLabelValues(op).Inc() @@ -85,12 +81,7 @@ func getRobotServerByName(c robotclient.Client, node *corev1.Node) (server *mode return server, nil } - forceRefreshClient, ok := c.(robotServerListForceRefreshClient) - if !ok { - return nil, nil - } - - serverList, err = forceRefreshClient.ServerGetListForceRefresh() + serverList, err = c.ServerGetListForceRefresh() if err != nil { hcops.HandleRateLimitExceededError(err, node) return nil, fmt.Errorf("%s: refresh for young node: %w", op, err) diff --git a/internal/mocks/robot.go b/internal/mocks/robot.go index f2b1783e2..9b7dafef5 100644 --- a/internal/mocks/robot.go +++ b/internal/mocks/robot.go @@ -14,6 +14,10 @@ func (m *RobotClient) ServerGetList() ([]models.Server, error) { return getRobotServers(args, 0), args.Error(1) } +func (m *RobotClient) ServerGetListForceRefresh() ([]models.Server, error) { + return m.ServerGetList() +} + func (m *RobotClient) SetCredentials(_, _ string) error { args := m.Called() return args.Error(3) diff --git a/internal/robot/client/interface.go b/internal/robot/client/interface.go index 06ecc9bf3..ee086f8ba 100644 --- a/internal/robot/client/interface.go +++ b/internal/robot/client/interface.go @@ -5,5 +5,6 @@ import "github.com/syself/hrobot-go/models" type Client interface { ServerGet(id int) (*models.Server, error) ServerGetList() ([]models.Server, error) + ServerGetListForceRefresh() ([]models.Server, error) SetCredentials(username, password string) error } From cf1c07d864c4b694a3d8d18f40bf97d088c9df3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Mon, 16 Mar 2026 12:51:52 +0100 Subject: [PATCH 7/7] misc. --- hcloud/instances.go | 10 ++++++---- hcloud/util.go | 9 --------- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/hcloud/instances.go b/hcloud/instances.go index ee37f9745..6b95bb043 100644 --- a/hcloud/instances.go +++ b/hcloud/instances.go @@ -106,15 +106,15 @@ func (i *instances) lookupServer( if err != nil { return nil, nil, false, fmt.Errorf("failed to get robot server %q: %w", string(node.Name), err) } - i.trackRobotServerMiss(node, bmServer) + i.trackRobotServerByNameMiss(node, bmServer) } } return hcloudServer, bmServer, isHCloudServer, nil } -// trackRobotServerMiss remembers repeated misses for young bare-metal nodes and +// trackRobotServerByNameMiss remembers repeated misses for young bare-metal nodes and // emits a warning on the second miss to surface unexpected stale-cache behavior. -func (i *instances) trackRobotServerMiss(node *corev1.Node, bmServer *models.Server) { +func (i *instances) trackRobotServerByNameMiss(node *corev1.Node, bmServer *models.Server) { if node == nil || node.Name == "" { return } @@ -128,7 +128,9 @@ func (i *instances) trackRobotServerMiss(node *corev1.Node, bmServer *models.Ser } i.robotMissByName[string(node.Name)]++ - logRepeatedYoungNodeRobotMiss(string(node.Name), i.robotMissByName[string(node.Name)]) + if i.robotMissByName[string(node.Name)] == 2 { + klog.Warningf("young node %q still missing in robot after %d lookup misses", node.Name, i.robotMissByName[string(node.Name)]) + } } func (i *instances) InstanceExists(ctx context.Context, node *corev1.Node) (bool, error) { diff --git a/hcloud/util.go b/hcloud/util.go index 99c7b4689..7eadc2bc4 100644 --- a/hcloud/util.go +++ b/hcloud/util.go @@ -29,7 +29,6 @@ import ( robotclient "github.com/syself/hetzner-cloud-controller-manager/internal/robot/client" "github.com/syself/hrobot-go/models" corev1 "k8s.io/api/core/v1" - "k8s.io/klog/v2" ) // youngRobotServerLookupWindow limits forced Robot refreshes to newly created nodes. @@ -143,14 +142,6 @@ func isYoungNode(node *corev1.Node) bool { return time.Since(node.CreationTimestamp.Time) <= youngRobotServerLookupWindow } -func logRepeatedYoungNodeRobotMiss(nodeName string, missCount int) { - if missCount != 2 { - return - } - - klog.Warningf("young node %q still missing in robot after %d lookup misses", nodeName, missCount) -} - func isHCloudServerByName(name string) bool { return !strings.HasPrefix(name, hostNamePrefixRobot) }