Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 37 additions & 3 deletions cmk/plugins/hp_proliant/agent_based/hp_proliant_da_cntlr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# conditions defined in the file COPYING, which is part of this source code package.
from collections.abc import Mapping
from enum import StrEnum
from typing import assert_never, NamedTuple, Self
from typing import assert_never, NamedTuple, Self, TypedDict

from cmk.agent_based.v2 import (
CheckPlugin,
Expand Down Expand Up @@ -182,6 +182,29 @@ def from_line(cls, line: list[str]) -> Self | None:
)


class Params(TypedDict):
condition_other_state: int
board_condition_other_state: int
board_status_other_state: int


# Defaults reproduce the historic behaviour (the "other" value maps to WARN).
# HPE ProLiant Gen11 / iLO 6 firmware tends to report the board condition as
# "other" for perfectly healthy controllers, which makes the service WARN
# forever; the check ruleset lets users remap each "other" value independently.
DEFAULT_PARAMETERS: Params = {
"condition_other_state": State.WARN.value,
"board_condition_other_state": State.WARN.value,
"board_status_other_state": State.WARN.value,
}


def _monitoring_state(value: SNMPCondition | SNMPState, other_state: State) -> State:
if value in (SNMPCondition.OTHER, SNMPState.OTHER):
return other_state
return value.to_state()


def parse_hp_proliant_da_cntlr(string_table: StringTable) -> ParsedSection:
return {line[0]: ControllerData.from_line(line) for line in string_table}

Expand All @@ -191,7 +214,9 @@ def discovery_hp_proliant_da_cntlr(section: ParsedSection) -> DiscoveryResult:
yield from (Service(item=item) for item in section)


def check_hp_proliant_da_cntlr(item: ControllerID, section: ParsedSection) -> CheckResult:
def check_hp_proliant_da_cntlr(
item: ControllerID, params: Params, section: ParsedSection
) -> CheckResult:
if not (subsection := section.get(item)):
yield Result(state=State.UNKNOWN, summary="Controller not found in SNMP data")
return
Expand All @@ -201,9 +226,16 @@ def check_hp_proliant_da_cntlr(item: ControllerID, section: ParsedSection) -> Ch
"Board-Condition": subsection.b_cond,
"Board-Status": subsection.b_status,
}
other_states: Mapping[str, State] = {
"Condition": State(params["condition_other_state"]),
"Board-Condition": State(params["board_condition_other_state"]),
"Board-Status": State(params["board_status_other_state"]),
}

yield Result(
state=State.worst(*(state.to_state() for state in states.values())),
state=State.worst(
*(_monitoring_state(state, other_states[label]) for label, state in states.items())
),
summary=(
f"{', '.join(f'{label}: {state}' for label, state in states.items())} "
f"(Role: {subsection.role}, Model: {subsection.model}, Slot: {subsection.slot}, "
Expand Down Expand Up @@ -231,4 +263,6 @@ def check_hp_proliant_da_cntlr(item: ControllerID, section: ParsedSection) -> Ch
service_name="HW Controller %s",
discovery_function=discovery_hp_proliant_da_cntlr,
check_function=check_hp_proliant_da_cntlr,
check_default_parameters=DEFAULT_PARAMETERS,
check_ruleset_name="hp_proliant_da_cntlr",
)
59 changes: 59 additions & 0 deletions cmk/plugins/hp_proliant/rulesets/hp_proliant_da_cntlr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python3
# Copyright (C) 2026 Checkmk GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.

from cmk.rulesets.v1 import Help, Title
from cmk.rulesets.v1.form_specs import (
DefaultValue,
DictElement,
Dictionary,
ServiceState,
)
from cmk.rulesets.v1.rule_specs import CheckParameters, HostAndItemCondition, Topic


def _make_form() -> Dictionary:
return Dictionary(
help_text=Help(
"The RAID controllers of HPE ProLiant servers report a condition, a "
"board condition and a board status. Each of these can take the value "
"<i>other</i>, meaning the instrument agent does not recognize the "
"status. HPE ProLiant Gen11 / iLO 6 firmware reports the board "
"condition as <i>other</i> for perfectly healthy controllers, which "
"makes the service WARN permanently. Here you can remap the monitoring "
"state used for the <i>other</i> value of each field independently."
),
elements={
"condition_other_state": DictElement(
required=False,
parameter_form=ServiceState(
title=Title("State when the controller condition is <i>other</i>"),
prefill=DefaultValue(ServiceState.WARN),
),
),
"board_condition_other_state": DictElement(
required=False,
parameter_form=ServiceState(
title=Title("State when the board condition is <i>other</i>"),
prefill=DefaultValue(ServiceState.WARN),
),
),
"board_status_other_state": DictElement(
required=False,
parameter_form=ServiceState(
title=Title("State when the board status is <i>other</i>"),
prefill=DefaultValue(ServiceState.WARN),
),
),
},
)


rule_spec_hp_proliant_da_cntlr = CheckParameters(
name="hp_proliant_da_cntlr",
title=Title("HPE ProLiant RAID controller"),
topic=Topic.STORAGE,
parameter_form=_make_form,
condition=HostAndItemCondition(item_title=Title("Controller index")),
)
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cmk.plugins.hp_proliant.agent_based.hp_proliant_da_cntlr import (
check_hp_proliant_da_cntlr,
ControllerID,
DEFAULT_PARAMETERS,
discovery_hp_proliant_da_cntlr,
parse_hp_proliant_da_cntlr,
)
Expand Down Expand Up @@ -73,7 +74,11 @@ def test_discovery() -> None:
def test_check(item: ControllerID, expected: list[Result]) -> None:
assert (
list(
check_hp_proliant_da_cntlr(item=item, section=parse_hp_proliant_da_cntlr(STRING_TABLE))
check_hp_proliant_da_cntlr(
item=item,
params=DEFAULT_PARAMETERS,
section=parse_hp_proliant_da_cntlr(STRING_TABLE),
)
)
== expected
)
Loading