From 2e99c97bafbd4a220862c2a024bac6e5bb2ef3e4 Mon Sep 17 00:00:00 2001 From: aarushjain29 Date: Thu, 14 May 2026 16:18:41 +0000 Subject: [PATCH 1/4] autotune using env variables --- tools/autotune_perf.py | 228 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) create mode 100644 tools/autotune_perf.py diff --git a/tools/autotune_perf.py b/tools/autotune_perf.py new file mode 100644 index 00000000000..9c5befb339f --- /dev/null +++ b/tools/autotune_perf.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 +##################################################################################### +# The MIT License (MIT) +# +# Copyright (c) 2015-2026 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +##################################################################################### + +"""Run migraphx-driver perf under curated MIGraphX environment-variable knobs.""" + +from __future__ import annotations + +import argparse +import math +import os +import re +import shutil +import subprocess +import sys +from typing import Iterable + +TOTAL_TIME_MS = re.compile(r"Total time:\s*([0-9]+\.?[0-9]*)\s*ms") + +KNOBS: tuple[tuple[str, str, str], ...] = ( + ("NHWC layout", "MIGRAPHX_ENABLE_NHWC", "1"), + ("Disable hipBLASLt", "MIGRAPHX_DISABLE_HIPBLASLT", "1"), + ("Enable CK GEMM", "MIGRAPHX_ENABLE_CK", "1"), + ("Disable MLIR", "MIGRAPHX_DISABLE_MLIR", "1"), + ("Conv->dot rewrite", "MIGRAPHX_ENABLE_REWRITE_DOT", "1"), +) + + +def parse_total_time_ms(output: str) -> float | None: + found = TOTAL_TIME_MS.search(output) + if found is None: + return None + return float(found.group(1)) + + +def resolve_driver(explicit: str | None) -> str: + if explicit: + return explicit + env_path = os.environ.get("MIGRAPHX_DRIVER") + if env_path: + return env_path + which = shutil.which("migraphx-driver") + if which: + return which + print( + "error: migraphx-driver not found. Pass --driver PATH or set MIGRAPHX_DRIVER.", + file=sys.stderr, + ) + sys.exit(1) + + +def warn_confounded_env(knobs: Iterable[tuple[str, str, str]]) -> None: + for _, name, _ in knobs: + if name in os.environ: + print( + f"warning: {name} is set in the environment; " + "results may be confounded.", + file=sys.stderr, + ) + + +def run_perf( + driver: str, + perf_argv: list[str], + env_name: str | None, + env_value: str | None, +) -> float | None: + env = os.environ.copy() + if env_name is not None and env_value is not None: + env[env_name] = env_value + proc = subprocess.run( + [driver, *perf_argv], + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + check=False, + ) + text = proc.stdout or "" + return parse_total_time_ms(text) + + +def write_config( + path: str, + model_file: str, + baseline_ms: float, + env_name: str, + env_value: str, + winner_ms: float, +) -> None: + pct = (winner_ms - baseline_ms) / baseline_ms * 100.0 + pct_prefix = "+" if pct >= 0 else "" + lines = ( + f"# Autotune config for {model_file}", + f"# Baseline: {baseline_ms} ms", + f"# Winner: {winner_ms} ms ({pct_prefix}{pct}%)", + "#", + "# Source this file before running migraphx-driver / your application.", + f"export {env_name}={env_value}", + "", + ) + with open(path, "w", encoding="utf-8") as out_f: + out_f.write("\n".join(lines)) + + +def default_config_path(perf_argv: list[str], explicit: str | None) -> str: + if explicit: + return explicit + if len(perf_argv) >= 2: + return perf_argv[1] + ".tune" + return "migraphx_perf.tune" + + +def main() -> None: + parser = argparse.ArgumentParser( + description=( + "Sweep a curated set of MIGraphX environment variables (one at a time), " + "run migraphx-driver perf for each, and report the fastest configuration." + ), + epilog=( + "Example: %(prog)s --driver ./build/bin/migraphx-driver perf model.onnx " + "--iterations 50 --cpu --log-level error" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--driver", + metavar="PATH", + help="migraphx-driver binary (default: MIGRAPHX_DRIVER or PATH)", + ) + parser.add_argument( + "-o", + "--output", + metavar="PATH", + help="Write winning exports here (default: .tune)", + ) + args, perf_argv = parser.parse_known_args() + if not perf_argv: + parser.error( + "missing perf invocation e.g. perf [--iterations N] [...]; " + "pass driver flags after script options." + ) + + perf_argv = list(perf_argv) + if perf_argv[0] != "perf": + perf_argv.insert(0, "perf") + + driver = resolve_driver(args.driver) + warn_confounded_env(KNOBS) + + rows: list[tuple[str, str | None, str | None]] = [("baseline", None, None)] + rows.extend((lab, nam, val) for lab, nam, val in KNOBS) + + model_display = perf_argv[1] if len(perf_argv) >= 2 else "(unknown)" + print(f"Autotune: {len(rows)} configurations on {model_display}") + + label_width = max(len(r[0]) for r in rows) + 2 + times: list[float | None] = [] + for index, (label, env_name, env_value) in enumerate(rows, start=1): + print(f"[{index}/{len(rows)}] {label} ... ", end="", flush=True) + t_ms = run_perf(driver, perf_argv, env_name, env_value) + times.append(t_ms) + if t_ms is None: + print("failed") + else: + print(f"{t_ms} ms") + + baseline = times[0] + if baseline is None: + print("error: baseline failed; cannot rank configurations.", file=sys.stderr) + sys.exit(1) + + win_index = min( + range(len(times)), + key=lambda i: times[i] if times[i] is not None else math.inf, + ) + + print("\nResults:") + for i, ((label, env_name, env_value), t_ms) in enumerate(zip(rows, times)): + gap = label_width - len(label) + gap = max(gap, 1) + line = f" {label}{' ' * gap}" + if t_ms is None: + print(f"{line}failed") + continue + line += f"{t_ms} ms" + if env_name is not None: + pct = (t_ms - baseline) / baseline * 100.0 + pct_prefix = "+" if pct >= 0 else "" + line += f" {pct_prefix}{pct}%" + if i == win_index and env_name is not None: + line += " <-- best" + print(line) + + _, win_env, win_val = rows[win_index] + win_t = times[win_index] + if win_env is None or win_val is None or win_t is None: + print("\nBaseline is best; no config written.") + return + + out_path = default_config_path(perf_argv, args.output) + write_config(out_path, model_display, baseline, win_env, win_val, win_t) + print(f"\nConfig written: {out_path}") + + +if __name__ == "__main__": + main() From f7f41ae1d31c0b6ba473ee5e4d2694c0351f3e30 Mon Sep 17 00:00:00 2001 From: aarushjain29 Date: Thu, 14 May 2026 16:49:46 +0000 Subject: [PATCH 2/4] removing hipblaslt flag and other errors --- tools/autotune_perf.py | 85 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 69 insertions(+), 16 deletions(-) diff --git a/tools/autotune_perf.py b/tools/autotune_perf.py index 9c5befb339f..51a7043918b 100644 --- a/tools/autotune_perf.py +++ b/tools/autotune_perf.py @@ -40,7 +40,7 @@ KNOBS: tuple[tuple[str, str, str], ...] = ( ("NHWC layout", "MIGRAPHX_ENABLE_NHWC", "1"), - ("Disable hipBLASLt", "MIGRAPHX_DISABLE_HIPBLASLT", "1"), + ("GEMM provider rocBLAS", "MIGRAPHX_SET_GEMM_PROVIDER", "rocblas"), ("Enable CK GEMM", "MIGRAPHX_ENABLE_CK", "1"), ("Disable MLIR", "MIGRAPHX_DISABLE_MLIR", "1"), ("Conv->dot rewrite", "MIGRAPHX_ENABLE_REWRITE_DOT", "1"), @@ -70,23 +70,42 @@ def resolve_driver(explicit: str | None) -> str: sys.exit(1) -def warn_confounded_env(knobs: Iterable[tuple[str, str, str]]) -> None: +def note_cleared_parent_knobs(knobs: Iterable[tuple[str, str, str]]) -> None: for _, name, _ in knobs: if name in os.environ: print( - f"warning: {name} is set in the environment; " - "results may be confounded.", + f"note: {name} is set in the environment; " + "it is unset for each autotune run for an isolated comparison.", file=sys.stderr, ) +def scrub_knob_vars(env: dict[str, str], knobs: Iterable[tuple[str, str, str]]) -> None: + for _, name, _ in knobs: + env.pop(name, None) + + +_LOG_SNIP_LEN = 4000 + + +def log_failed_driver_run(label: str, returncode: int, text: str) -> None: + snippet = text.strip() + if len(snippet) > _LOG_SNIP_LEN: + snippet = "... (truncated)\n" + snippet[-_LOG_SNIP_LEN:] + print(f"autotune: {label}: driver exit {returncode}", file=sys.stderr) + if snippet: + print(snippet, file=sys.stderr) + + def run_perf( driver: str, perf_argv: list[str], env_name: str | None, env_value: str | None, + label: str, ) -> float | None: env = os.environ.copy() + scrub_knob_vars(env, KNOBS) if env_name is not None and env_value is not None: env[env_name] = env_value proc = subprocess.run( @@ -98,7 +117,33 @@ def run_perf( check=False, ) text = proc.stdout or "" - return parse_total_time_ms(text) + parsed = parse_total_time_ms(text) + if proc.returncode != 0 or parsed is None: + log_failed_driver_run(label, proc.returncode, text) + return None + return parsed + + +_MODEL_SUFFIXES = ( + ".onnx", + ".pb", + ".mxr", + ".tf", + ".json", +) + + +def infer_model_argument(perf_argv: list[str]) -> str | None: + for tok in perf_argv[1:]: + if tok.startswith("-"): + continue + low = tok.lower() + if any(low.endswith(s) for s in _MODEL_SUFFIXES): + return tok + for tok in perf_argv[1:]: + if not tok.startswith("-"): + return tok + return None def write_config( @@ -109,12 +154,16 @@ def write_config( env_value: str, winner_ms: float, ) -> None: - pct = (winner_ms - baseline_ms) / baseline_ms * 100.0 - pct_prefix = "+" if pct >= 0 else "" + if baseline_ms > 0: + pct = (winner_ms - baseline_ms) / baseline_ms * 100.0 + pct_prefix = "+" if pct >= 0 else "" + winner_comment = f"# Winner: {winner_ms} ms ({pct_prefix}{pct}%)" + else: + winner_comment = f"# Winner: {winner_ms} ms (no % delta; baseline was 0 ms)" lines = ( f"# Autotune config for {model_file}", f"# Baseline: {baseline_ms} ms", - f"# Winner: {winner_ms} ms ({pct_prefix}{pct}%)", + winner_comment, "#", "# Source this file before running migraphx-driver / your application.", f"export {env_name}={env_value}", @@ -127,8 +176,9 @@ def write_config( def default_config_path(perf_argv: list[str], explicit: str | None) -> str: if explicit: return explicit - if len(perf_argv) >= 2: - return perf_argv[1] + ".tune" + model = infer_model_argument(perf_argv) + if model: + return model + ".tune" return "migraphx_perf.tune" @@ -167,19 +217,19 @@ def main() -> None: perf_argv.insert(0, "perf") driver = resolve_driver(args.driver) - warn_confounded_env(KNOBS) + note_cleared_parent_knobs(KNOBS) rows: list[tuple[str, str | None, str | None]] = [("baseline", None, None)] rows.extend((lab, nam, val) for lab, nam, val in KNOBS) - model_display = perf_argv[1] if len(perf_argv) >= 2 else "(unknown)" + model_display = infer_model_argument(perf_argv) or "(unknown)" print(f"Autotune: {len(rows)} configurations on {model_display}") label_width = max(len(r[0]) for r in rows) + 2 times: list[float | None] = [] for index, (label, env_name, env_value) in enumerate(rows, start=1): print(f"[{index}/{len(rows)}] {label} ... ", end="", flush=True) - t_ms = run_perf(driver, perf_argv, env_name, env_value) + t_ms = run_perf(driver, perf_argv, env_name, env_value, label) times.append(t_ms) if t_ms is None: print("failed") @@ -206,9 +256,12 @@ def main() -> None: continue line += f"{t_ms} ms" if env_name is not None: - pct = (t_ms - baseline) / baseline * 100.0 - pct_prefix = "+" if pct >= 0 else "" - line += f" {pct_prefix}{pct}%" + if baseline > 0: + pct = (t_ms - baseline) / baseline * 100.0 + pct_prefix = "+" if pct >= 0 else "" + line += f" {pct_prefix}{pct}%" + else: + line += " n/a" if i == win_index and env_name is not None: line += " <-- best" print(line) From b47a864fee3d4b8db3c45793077b6da0c686643e Mon Sep 17 00:00:00 2001 From: aarushjain29 Date: Thu, 14 May 2026 22:07:22 +0000 Subject: [PATCH 3/4] addressed comments --- tools/autotune_perf.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/autotune_perf.py b/tools/autotune_perf.py index 51a7043918b..b30380af8b6 100644 --- a/tools/autotune_perf.py +++ b/tools/autotune_perf.py @@ -23,7 +23,11 @@ # THE SOFTWARE. ##################################################################################### -"""Run migraphx-driver perf under curated MIGraphX environment-variable knobs.""" +"""Run migraphx-driver perf under curated MIGraphX environment-variable knobs. + +``MIGRAPHX_MLIR_USE_SPECIFIC_OPS`` values follow ``fuse_mlir.cpp`` (fused, convolution, dot, attention). +There is no ``MIGRAPHX_DISABLE_MIOPEN_POOLING``; pooling is probed with ``MIGRAPHX_ENABLE_MIOPEN_POOLING=1``. +""" from __future__ import annotations @@ -38,11 +42,15 @@ TOTAL_TIME_MS = re.compile(r"Total time:\s*([0-9]+\.?[0-9]*)\s*ms") +_MLIR_OPS_WHITELIST = "convolution,dot,fused,attention" + KNOBS: tuple[tuple[str, str, str], ...] = ( ("NHWC layout", "MIGRAPHX_ENABLE_NHWC", "1"), ("GEMM provider rocBLAS", "MIGRAPHX_SET_GEMM_PROVIDER", "rocblas"), ("Enable CK GEMM", "MIGRAPHX_ENABLE_CK", "1"), ("Disable MLIR", "MIGRAPHX_DISABLE_MLIR", "1"), + ("MLIR use specific ops", "MIGRAPHX_MLIR_USE_SPECIFIC_OPS", _MLIR_OPS_WHITELIST), + ("Enable MIOpen pooling", "MIGRAPHX_ENABLE_MIOPEN_POOLING", "1"), ("Conv->dot rewrite", "MIGRAPHX_ENABLE_REWRITE_DOT", "1"), ) @@ -60,11 +68,15 @@ def resolve_driver(explicit: str | None) -> str: env_path = os.environ.get("MIGRAPHX_DRIVER") if env_path: return env_path + cwd_driver = os.path.join(os.getcwd(), "bin", "migraphx-driver") + if os.path.isfile(cwd_driver) and os.access(cwd_driver, os.X_OK): + return cwd_driver which = shutil.which("migraphx-driver") if which: return which print( - "error: migraphx-driver not found. Pass --driver PATH or set MIGRAPHX_DRIVER.", + "error: migraphx-driver not found. Try building first (./bin/migraphx-driver " + "from the build tree), pass --driver PATH, or set MIGRAPHX_DRIVER.", file=sys.stderr, ) sys.exit(1) @@ -197,7 +209,10 @@ def main() -> None: parser.add_argument( "--driver", metavar="PATH", - help="migraphx-driver binary (default: MIGRAPHX_DRIVER or PATH)", + help=( + "migraphx-driver binary (default: MIGRAPHX_DRIVER, else ./bin/migraphx-driver " + "under the current working directory if present and executable, else PATH)" + ), ) parser.add_argument( "-o", From 09b08d850c4b18fd6a6b4e1bdea08b50125b238d Mon Sep 17 00:00:00 2001 From: aarushjain29 Date: Wed, 20 May 2026 18:35:07 +0000 Subject: [PATCH 4/4] addressing comments --- tools/autotune_perf.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/autotune_perf.py b/tools/autotune_perf.py index b30380af8b6..5f401151f73 100644 --- a/tools/autotune_perf.py +++ b/tools/autotune_perf.py @@ -25,8 +25,21 @@ """Run migraphx-driver perf under curated MIGraphX environment-variable knobs. -``MIGRAPHX_MLIR_USE_SPECIFIC_OPS`` values follow ``fuse_mlir.cpp`` (fused, convolution, dot, attention). -There is no ``MIGRAPHX_DISABLE_MIOPEN_POOLING``; pooling is probed with ``MIGRAPHX_ENABLE_MIOPEN_POOLING=1``. +Each knob is toggled in isolation against a clean baseline and the fastest +configuration is written out as a sourceable ``export`` file. The probed +knobs are: + +* ``MIGRAPHX_ENABLE_NHWC`` - prefer NHWC layout for convolutions. +* ``MIGRAPHX_SET_GEMM_PROVIDER`` - select the GEMM backend (rocBLAS). +* ``MIGRAPHX_ENABLE_CK`` - enable Composable Kernel GEMMs. +* ``MIGRAPHX_DISABLE_MLIR`` - disable the MLIR code path. +* ``MIGRAPHX_MLIR_USE_SPECIFIC_OPS`` - restrict MLIR to specific ops + (convolution, dot, fused, attention). +* ``MIGRAPHX_ENABLE_MIOPEN_POOLING`` - use MIOpen for pooling. +* ``MIGRAPHX_ENABLE_REWRITE_DOT`` - rewrite eligible convolutions to dot. + +See the MIGraphX documentation for the authoritative list of environment +variables and their semantics. """ from __future__ import annotations