From 4916383661dccb994c464911c134c32ad2a0fdce Mon Sep 17 00:00:00 2001 From: Ali Afzal Date: Wed, 10 Jun 2026 14:40:30 -0700 Subject: [PATCH] Add OSS CI to cross-compile and run the Cadence Xtensa backend The upstream executor_runner cannot cross-compile to Xtensa because gflags pulls in mkdir(2), absent from Xtensa newlib. Add cadence_executor_runner, a gflags-free ExecuTorch runner for the Cadence Xtensa cores targeting the Instruction Set Simulator (xt-run): it uses plain argv parsing like the Arm and NXP backends, loads a .pte via xt-run semi-hosting, runs the first method with all-ones inputs, and prints outputs. EXECUTORCH_BUILD_CADENCE_RUNNER builds it, linking cadence_ops_lib transitively (no --whole-archive, which would double-run static kernel registration); -lidma is linked only for Vision/Fusion-G3 cores, whose ops reference iDMA and whose LSPs ship libidma, while HiFi4 does not. Also register op_quantized_depthwise_conv1d_{ncl,nlc}.cpp in the HiFi4 operators CMakeLists, which codegen references (omitting the sources broke the cross-compile link). Add an xtensa-build job to the Cadence Build & Test workflow (build-cadence-runner.yml), alongside the existing host cpu-build/cpu-test, to cross-compile the backend for the Xtensa cores. It is a build stage producing a runner artifact; the ISS test stage follows separately (cf. cpu-build -> cpu-test). The Xtensa toolchain and core configs are licensed and fetched at runtime from an auth-gated object store via a short-lived OIDC credential; the role, region, and store are supplied through CI variables and are not committed. setup-xtensa-tools.sh downloads and installs the toolchain/core for a backend, rewrites the vendor params to local paths, and exports the Xtensa env; build-cadence-xtensa.sh cross-compiles cadence_executor_runner. The job builds a [hifi4, vision] matrix and uploads the runner. fusion_g3 is omitted from the matrix until the upstream fusion_g3 <-> nnlib API skew is fixed (its runner does not link). --- .ci/scripts/build-cadence-xtensa.sh | 79 +++++++ .ci/scripts/setup-xtensa-tools.sh | 164 +++++++++++++++ .github/workflows/_xtensa_build.yml | 94 +++++++++ .github/workflows/build-cadence-runner.yml | 22 ++ backends/cadence/CMakeLists.txt | 35 ++++ backends/cadence/cadence_executor_runner.cpp | 198 ++++++++++++++++++ .../cadence/hifi/operators/CMakeLists.txt | 2 + 7 files changed, 594 insertions(+) create mode 100755 .ci/scripts/build-cadence-xtensa.sh create mode 100755 .ci/scripts/setup-xtensa-tools.sh create mode 100644 .github/workflows/_xtensa_build.yml create mode 100644 backends/cadence/cadence_executor_runner.cpp diff --git a/.ci/scripts/build-cadence-xtensa.sh b/.ci/scripts/build-cadence-xtensa.sh new file mode 100755 index 00000000000..bb406528bea --- /dev/null +++ b/.ci/scripts/build-cadence-xtensa.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +# +# Cross-compile cadence_executor_runner for a Cadence Xtensa core and (by +# default) smoke-test it on the Instruction Set Simulator with a trivial model. +# +# Requires the Xtensa toolchain env to already be set (run +# .ci/scripts/setup-xtensa-tools.sh first): XTENSA_TOOLCHAIN, +# TOOLCHAIN_VER, XTENSA_SYSTEM, XTENSA_CORE, XTENSAD_LICENSE_FILE, +# CADENCE_OPT_FLAG, and xt-clang on PATH. +# +# Usage: +# .ci/scripts/build-cadence-xtensa.sh [--no-run] +# --no-run : compile only, skip the ISS smoke test + +set -euo pipefail + +RUN_SMOKE=1 +[[ "${1:-}" == "--no-run" ]] && RUN_SMOKE=0 + +: "${XTENSA_TOOLCHAIN:?run setup-xtensa-tools.sh first}" +: "${TOOLCHAIN_VER:?run setup-xtensa-tools.sh first}" +: "${XTENSA_CORE:?run setup-xtensa-tools.sh first}" +: "${CADENCE_OPT_FLAG:?run setup-xtensa-tools.sh first}" + +NPROC=$(nproc) +echo "=== building cadence_executor_runner for ${XTENSA_CORE} (${CADENCE_OPT_FLAG}) ===" +xt-clang --version | head -1 + +rm -rf cmake-out +CXXFLAGS="-fno-exceptions -fno-rtti" cmake \ + -DCMAKE_TOOLCHAIN_FILE=./backends/cadence/cadence.cmake \ + -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_CADENCE=ON \ + "-D${CADENCE_OPT_FLAG}=ON" \ + -DEXECUTORCH_BUILD_PORTABLE_OPS=ON \ + -DEXECUTORCH_BUILD_CADENCE_RUNNER=ON \ + -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \ + -DEXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL=ON \ + -DEXECUTORCH_ENABLE_LOGGING=ON \ + -DEXECUTORCH_BUILD_PTHREADPOOL=OFF \ + -DEXECUTORCH_BUILD_CPUINFO=OFF \ + -DEXECUTORCH_USE_DL=OFF \ + -DEXECUTORCH_BUILD_KERNELS_LLM=OFF \ + -DEXECUTORCH_BUILD_DEVTOOLS=OFF \ + -DHAVE_FNMATCH_H=OFF \ + -DFLATCC_ALLOW_WERROR=OFF \ + -DPYTHON_EXECUTABLE="$(which python3)" \ + -Bcmake-out . + +cmake --build cmake-out --target cadence_executor_runner -j"${NPROC}" + +RUNNER="cmake-out/backends/cadence/cadence_executor_runner" +if [[ ! -f "${RUNNER}" ]]; then + echo "ERROR: ${RUNNER} was not produced" >&2 + exit 1 +fi +command -v file >/dev/null 2>&1 && file "${RUNNER}" || true +echo "Build OK: ${RUNNER}" + +if [[ "${RUN_SMOKE}" == "0" ]]; then + echo "Skipping ISS smoke test (--no-run)." + exit 0 +fi + +echo "=== ISS smoke test: export add.pte and run on xt-run --turbo ===" +python3 -m examples.portable.scripts.export --model_name=add >/dev/null +LOG=$(mktemp) +xt-run --turbo "${RUNNER}" --model_path=add.pte 2>&1 | tee "${LOG}" +if ! grep -q "Model executed successfully" "${LOG}"; then + echo "ERROR: ISS smoke test did not report success for ${XTENSA_CORE}" >&2 + exit 1 +fi +echo "ISS smoke test passed for ${XTENSA_CORE}." diff --git a/.ci/scripts/setup-xtensa-tools.sh b/.ci/scripts/setup-xtensa-tools.sh new file mode 100755 index 00000000000..8510c32c859 --- /dev/null +++ b/.ci/scripts/setup-xtensa-tools.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +# +# Download and install the licensed Cadence Xtensa toolchain + core config for +# a given backend, then export the environment that +# backends/cadence/cadence.cmake and xt-run need. +# +# The artifacts (host tools, the core tarball, and the bundled license) cannot +# be hosted publicly, so they are fetched at runtime from an auth-gated object +# store. The store location is provided by the caller via XTENSA_S3_BUCKET (set +# from a CI variable); credentials are obtained out of band before this runs. +# +# Usage: +# XTENSA_S3_BUCKET= .ci/scripts/setup-xtensa-tools.sh +# backend = hifi4 | vision | fusion_g3 +# +# In GitHub Actions this appends the toolchain env to $GITHUB_ENV so later +# steps inherit it. Run locally to populate a workspace for manual builds. +# +# Modeled on .ci/scripts/setup-arm-baremetal-tools.sh. + +set -euo pipefail + +BACKEND="${1:-}" +if [[ -z "${BACKEND}" ]]; then + echo "ERROR: usage: XTENSA_S3_BUCKET= $0 " >&2 + exit 1 +fi + +S3_BUCKET="${XTENSA_S3_BUCKET:-}" +if [[ -z "${S3_BUCKET}" ]]; then + echo "ERROR: XTENSA_S3_BUCKET is not set (provide it from a CI variable)." >&2 + exit 1 +fi +# Objects live flat at the bucket root by default; set these to put toolchains +# and cores under key prefixes instead. +S3_TOOLCHAIN_PREFIX="${XTENSA_S3_TOOLCHAIN_PREFIX:-}" +S3_CORE_PREFIX="${XTENSA_S3_CORE_PREFIX:-}" + +# Per-backend mapping: core tarball, toolchain tarball, core name, OPT flag. +# The toolchain's clang major must match the core's codegen plugin: +# hifi4 / fusion_g3 cores (RI-2022.10, clang 10) -> RI-2022.9 host tools +# vision core (RJ-2025.5, clang 15) -> RJ-2025.5 host tools +case "${BACKEND}" in + hifi4) + CORE_NAME="hifi4_ss_spfpu_7_et_ci2" + CORE_TARBALL="hifi4_ss_spfpu_7_et_ci2_linux.tgz" + TOOLCHAIN_TARBALL="XtensaTools_RI_2022_9_linux.tgz" + TOOLCHAIN_VER="RI-2022.9-linux" + OPT_FLAG="EXECUTORCH_NNLIB_OPT" + ;; + fusion_g3) + CORE_NAME="XRC_FuG3_TYP_SPVFPU_et_c2" + CORE_TARBALL="XRC_FuG3_TYP_SPVFPU_et_c2_linux.tgz" + TOOLCHAIN_TARBALL="XtensaTools_RI_2022_9_linux.tgz" + TOOLCHAIN_VER="RI-2022.9-linux" + OPT_FLAG="EXECUTORCH_FUSION_G3_OPT" + ;; + vision) + CORE_NAME="XRC_Vision_110_AO_et_ci2" + CORE_TARBALL="XRC_Vision_110_AO_et_ci2_linux.tgz" + TOOLCHAIN_TARBALL="XtensaTools_RJ_2025_5_linux.tgz" + TOOLCHAIN_VER="RJ-2025.5-linux" + OPT_FLAG="EXECUTORCH_VISION_OPT" + ;; + *) + echo "ERROR: unknown backend '${BACKEND}' (expected hifi4|vision|fusion_g3)" >&2 + exit 1 + ;; +esac + +XTENSA_ROOT="${XTENSA_ROOT:-/tmp/xtensa}" +TOOLS_ROOT="${XTENSA_ROOT}/tools" # contains -linux/XtensaTools +CORES_ROOT="${XTENSA_ROOT}/cores" # contains -linux/ +REGISTRY_ROOT="${XTENSA_ROOT}/registry/${CORE_NAME}" +DL_DIR="${XTENSA_ROOT}/download" +mkdir -p "${TOOLS_ROOT}" "${CORES_ROOT}" "${REGISTRY_ROOT}" "${DL_DIR}" + +s3_get() { + # $1 = s3 key, $2 = local dest + local key="$1" dest="$2" + echo "Downloading s3://${S3_BUCKET}/${key} ..." + aws s3 cp "s3://${S3_BUCKET}/${key}" "${dest}" --only-show-errors +} + +extract_tgz() { + # $1 = .tgz, $2 = dest dir. Some vendor core tarballs carry trailing bytes + # after a valid gzip stream; gzip then exits 2 ("trailing garbage ignored") + # even though the archive decompressed fully, which aborts `tar xzf`. Key the + # success check off tar's exit, not gzip's. + local tgz="$1" dest="$2" rc + set +o pipefail + gzip -dc "${tgz}" 2>/dev/null | tar xf - -C "${dest}" + rc=${PIPESTATUS[1]} + set -o pipefail + [[ "${rc}" -eq 0 ]] || { echo "ERROR: failed to extract ${tgz} (tar rc=${rc})" >&2; exit 1; } +} + +# 1. Toolchain (host xt-clang/xt-run). Skip re-extract if already present. +if [[ ! -d "${TOOLS_ROOT}/${TOOLCHAIN_VER}/XtensaTools" ]]; then + s3_get "${S3_TOOLCHAIN_PREFIX:+${S3_TOOLCHAIN_PREFIX}/}${TOOLCHAIN_TARBALL}" "${DL_DIR}/${TOOLCHAIN_TARBALL}" + extract_tgz "${DL_DIR}/${TOOLCHAIN_TARBALL}" "${TOOLS_ROOT}" +fi +TOOLCHAIN_HOME="${TOOLS_ROOT}/${TOOLCHAIN_VER}/XtensaTools" +if [[ ! -x "${TOOLCHAIN_HOME}/bin/xt-clang" ]]; then + echo "ERROR: xt-clang not found at ${TOOLCHAIN_HOME}/bin after extract" >&2 + exit 1 +fi + +# 2. Core config (ISA libs, params, examples, bundled magic-key license). +s3_get "${S3_CORE_PREFIX:+${S3_CORE_PREFIX}/}${CORE_TARBALL}" "${DL_DIR}/${CORE_TARBALL}" +extract_tgz "${DL_DIR}/${CORE_TARBALL}" "${CORES_ROOT}" +CORE_DIR=$(echo "${CORES_ROOT}"/*/"${CORE_NAME}") +if [[ ! -d "${CORE_DIR}" ]]; then + echo "ERROR: core dir for ${CORE_NAME} not found under ${CORES_ROOT}" >&2 + exit 1 +fi + +# 3. Build a local Xtensa core registry with the XPG-internal build paths in +# the params file rewritten to our extracted toolchain + core locations. +# The vendor ships params referencing /././home/xpgcust/... build paths. +PARAMS_SRC="${CORE_DIR}/config/${CORE_NAME}-params" +TOOLS_PFX=$(sed -n 's/^install-prefix = //p' "${PARAMS_SRC}" | head -1) +TOOLSUB_PFX=$(sed -n 's/^xtensa-tools = //p' "${PARAMS_SRC}" | head -1) +CFG_PFX=$(sed -n 's/^config-prefix = //p' "${PARAMS_SRC}" | head -1) +sed \ + -e "s|${TOOLS_PFX}|${TOOLCHAIN_HOME}|g" \ + -e "s|${TOOLSUB_PFX}|${TOOLCHAIN_HOME}/Tools|g" \ + -e "s|${CFG_PFX}|${CORE_DIR}|g" \ + "${PARAMS_SRC}" > "${REGISTRY_ROOT}/${CORE_NAME}-params" +ln -sf "${CORE_NAME}-params" "${REGISTRY_ROOT}/default-params" + +LICENSE_FILE="${CORE_DIR}/misc/license.dat" + +# 4. Export environment. cadence.cmake reads XTENSA_TOOLCHAIN/TOOLCHAIN_VER; +# xt-clang/xt-run read XTENSA_SYSTEM/XTENSA_CORE; xtensad reads +# XTENSAD_LICENSE_FILE (the bundled uncounted magic key, no server needed). +emit() { + # Export into the current shell (so callers that `source` this script get the + # vars) and append to $GITHUB_ENV (so later workflow steps inherit them too). + echo "$1" + export "${1?}" + if [[ -n "${GITHUB_ENV:-}" ]]; then echo "$1" >> "${GITHUB_ENV}"; fi +} +echo "=== Xtensa env for backend '${BACKEND}' (core ${CORE_NAME}) ===" +emit "XTENSA_TOOLCHAIN=${TOOLS_ROOT}" +emit "TOOLCHAIN_VER=${TOOLCHAIN_VER}" +emit "XTENSA_SYSTEM=${REGISTRY_ROOT}" +emit "XTENSA_CORE=${CORE_NAME}" +emit "XTENSAD_LICENSE_FILE=${LICENSE_FILE}" +emit "CADENCE_OPT_FLAG=${OPT_FLAG}" +if [[ -n "${GITHUB_PATH:-}" ]]; then + echo "${TOOLCHAIN_HOME}/bin" >> "${GITHUB_PATH}" +fi +export PATH="${TOOLCHAIN_HOME}/bin:${PATH}" + +echo "=== sanity ===" +xt-clang --version 2>&1 | head -1 +xt-run --show-config=cores 2>&1 | sed -n '/available/,/registry/p' | head -6 +echo "Xtensa toolchain ready for ${BACKEND}." diff --git a/.github/workflows/_xtensa_build.yml b/.github/workflows/_xtensa_build.yml new file mode 100644 index 00000000000..ac78323aa3e --- /dev/null +++ b/.github/workflows/_xtensa_build.yml @@ -0,0 +1,94 @@ +# Reusable: cross-compile cadence_executor_runner for one Cadence Xtensa core. +# +# A native job (not linux_job_v2) because the GitHub OIDC token must be minted on +# the runner host: the ACTIONS_ID_TOKEN_REQUEST_* vars do not cross into +# linux_job_v2's docker exec. So the role is assumed on the host, then the build +# runs inside the CI image via docker run with the creds passed in. Binding the +# environment also gives the OIDC token the environment claim. The licensed +# toolchain + core configs are fetched at runtime from an auth-gated store; +# role/region/store come from CI variables and are not committed. +name: xtensa-build + +on: + workflow_call: + inputs: + backend: + description: "Cadence backend to build (hifi4 | vision | fusion_g3)" + required: true + type: string + ref: + description: "Git ref to check out" + required: false + type: string + default: "" + +jobs: + build: + name: ${{ inputs.backend }} + runs-on: linux.2xlarge + environment: cadence + permissions: + id-token: write + contents: read + steps: + - name: Checkout executorch + uses: actions/checkout@v4 + with: + submodules: recursive + ref: ${{ inputs.ref }} + + - name: Calculate docker image + id: calculate-docker-image + uses: pytorch/test-infra/.github/actions/calculate-docker-image@main + with: + docker-image-name: ci-image:executorch-ubuntu-22.04-clang12 + + - name: Pull docker image + run: docker pull "${{ steps.calculate-docker-image.outputs.docker-image }}" + + - name: Assume Cadence artifacts role (host OIDC) + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ vars.CADENCE_CI_AWS_ROLE }} + aws-region: ${{ vars.CADENCE_CI_AWS_REGION }} + + - name: Cross-compile cadence_executor_runner + env: + DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} + BACKEND: ${{ inputs.backend }} + XTENSA_S3_BUCKET: ${{ vars.CADENCE_CI_S3_BUCKET }} + shell: bash + run: | + set -eux + # OIDC/role assumption already happened on the host above; pass the + # resulting AWS creds and the store/backend into the CI image, where + # the toolchain download + cross-compile run. + docker run --rm \ + -e BACKEND -e XTENSA_S3_BUCKET \ + -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_SESSION_TOKEN \ + -e AWS_DEFAULT_REGION -e AWS_REGION \ + -v "${GITHUB_WORKSPACE}:/work/executorch" -w /work/executorch \ + "${DOCKER_IMAGE}" \ + bash -c ' + set -exo pipefail + eval "$(/opt/conda/bin/conda shell.bash hook)" + conda activate "$(conda env list --json | jq -r ".envs | .[-1]")" + ./install_requirements.sh > /dev/null + pip install --quiet awscli + # hifi4/fusion_g3 optimized kernels need the foss-xtensa nnlib + # sources, which are not vendored in executorch; the cadence + # installer clones them. vision has no nnlib dependency. + if [ "${BACKEND}" != "vision" ]; then + backends/cadence/install_requirements.sh + fi + source .ci/scripts/setup-xtensa-tools.sh "${BACKEND}" + .ci/scripts/build-cadence-xtensa.sh --no-run + chmod -R a+rX cmake-out + ' + + - name: Upload runner + uses: actions/upload-artifact@v4 + with: + name: cadence-xtensa-build-${{ inputs.backend }} + path: cmake-out/backends/cadence/cadence_executor_runner + if-no-files-found: error diff --git a/.github/workflows/build-cadence-runner.yml b/.github/workflows/build-cadence-runner.yml index 6f99958616f..83d0e50d7b1 100644 --- a/.github/workflows/build-cadence-runner.yml +++ b/.github/workflows/build-cadence-runner.yml @@ -50,3 +50,25 @@ jobs: uses: ./.github/workflows/_test_cadence.yml with: ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + + # Cross-compile cadence_executor_runner for each Cadence Xtensa core, one job + # per backend so they show as separate lines (no matrix grouping). Shared logic + # lives in _xtensa_build.yml. fusion_g3 is omitted until the upstream fusion_g3 + # <-> nnlib-FusionG3 API skew is fixed (its runner does not link). + hifi-build: + permissions: + id-token: write + contents: read + uses: ./.github/workflows/_xtensa_build.yml + with: + backend: hifi4 + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + + vision-build: + permissions: + id-token: write + contents: read + uses: ./.github/workflows/_xtensa_build.yml + with: + backend: vision + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} diff --git a/backends/cadence/CMakeLists.txt b/backends/cadence/CMakeLists.txt index 271b4806614..f04bda30a69 100644 --- a/backends/cadence/CMakeLists.txt +++ b/backends/cadence/CMakeLists.txt @@ -97,3 +97,38 @@ else() endif() add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators) + +# Cadence executor_runner: cross-compiled ExecuTorch runner for the Xtensa ISS +# (xt-run / xt-run --turbo). Self-contained, gflags-free argv parser, reads .pte +# via xt-run semi-hosting. +# +# Usage: cmake ... -DEXECUTORCH_BUILD_CADENCE_RUNNER=ON xt-run --turbo +# cmake-out/backends/cadence/cadence_executor_runner \ --model_path=add.pte +if(EXECUTORCH_BUILD_CADENCE_RUNNER) + add_executable(cadence_executor_runner cadence_executor_runner.cpp) + target_compile_definitions( + cadence_executor_runner PRIVATE ET_ENABLE_ENUM_STRINGS=0 + ) + target_include_directories( + cadence_executor_runner + PRIVATE ${_common_include_directories} ${CMAKE_BINARY_DIR} + ${CMAKE_BINARY_DIR}/include + ) + # Mirror the upstream executor_runner cadence link list (top-level + # CMakeLists.txt: list(APPEND _executor_runner_libs cadence_ops_lib)). Do NOT + # add --whole-archive: cadence_ops_lib is also pulled transitively, and + # forcing a second copy double-runs its static kernel-registration + # initializers and asserts at runtime. + target_link_libraries( + cadence_executor_runner PRIVATE executorch extension_evalue_util + extension_runner_util cadence_ops_lib + ) + # Vision and Fusion-G3 ops (e.g. op_softmax) reference iDMA scheduling symbols + # and those cores ship libidma in their LSP. HiFi4 and generic cores do not + # use iDMA and their LSPs may not provide libidma, so only link it for the + # cores that need it. + if(EXECUTORCH_VISION_OPT OR EXECUTORCH_FUSION_G3_OPT) + target_link_options(cadence_executor_runner PRIVATE -lidma) + endif() + target_link_options(cadence_executor_runner PRIVATE -static -lm) +endif() diff --git a/backends/cadence/cadence_executor_runner.cpp b/backends/cadence/cadence_executor_runner.cpp new file mode 100644 index 00000000000..57043cd8667 --- /dev/null +++ b/backends/cadence/cadence_executor_runner.cpp @@ -0,0 +1,198 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +/** + * @file + * + * ExecuTorch runner for Cadence Xtensa cores, intended to run on the + * Xtensa Instruction Set Simulator (xt-run / xt-run --turbo). + * + * Reads a .pte from the host filesystem via xt-run semi-hosting, + * executes the first method with all-ones inputs (via + * prepare_input_tensors), and prints the outputs. + * + * Argument parsing is plain argv inspection — gflags pulls in + * mkdir(2), which Xtensa newlib does not declare, breaking + * cross-compile. Mirrors the same approach Arm and NXP take in their + * embedded runners. + * + * Usage: + * xt-run --turbo cadence_executor_runner --model_path=add.pte + * xt-run --mem_model --summary cadence_executor_runner --model_path=add.pte + */ + +#include +#include +#include +#include +#include +// patternlint-disable executorch-cpp-nostdinc +#include + +#include +#include +#include +#include +#include +#include + +using executorch::runtime::Error; +using executorch::runtime::Result; + +namespace { + +// 18 KB has historically been enough for the cadence "hello world" +// models (add, simple MLP). Bump if you hit MemoryAllocator overflow +// at load_method time. +constexpr std::size_t kMethodAllocatorBytes = 18 * 1024U; +uint8_t method_allocator_pool[kMethodAllocatorBytes]; + +const char* parse_model_path(int argc, char** argv) { + constexpr char kFlag[] = "--model_path="; + constexpr std::size_t kFlagLen = sizeof(kFlag) - 1; + for (int i = 1; i < argc; ++i) { + if (std::strncmp(argv[i], kFlag, kFlagLen) == 0) { + // Static so the returned pointer stays valid after parse returns. + static std::string path{argv[i] + kFlagLen}; + return path.c_str(); + } + } + return "model.pte"; +} + +bool slurp(const char* path, std::vector* out) { + FILE* f = std::fopen(path, "rb"); + if (!f) { + ET_LOG(Error, "fopen('%s') failed", path); + return false; + } + std::fseek(f, 0, SEEK_END); + long sz = std::ftell(f); + std::fseek(f, 0, SEEK_SET); + if (sz <= 0) { + ET_LOG(Error, "model file '%s' is empty or stat failed", path); + std::fclose(f); + return false; + } + out->resize(static_cast(sz)); + std::size_t n = std::fread(out->data(), 1, sz, f); + std::fclose(f); + if (static_cast(n) != sz) { + ET_LOG(Error, "fread short on '%s': %zu/%ld", path, n, sz); + return false; + } + ET_LOG(Info, "Loaded %ld bytes from %s", sz, path); + return true; +} + +} // namespace + +int main(int argc, char** argv) { + executorch::runtime::runtime_init(); + + std::vector model; + const char* path = parse_model_path(argc, argv); + if (!slurp(path, &model)) { + return 1; + } + + auto loader = + executorch::extension::BufferDataLoader(model.data(), model.size()); + + Result program = + executorch::runtime::Program::load(&loader); + if (!program.ok()) { + ET_LOG(Error, "Program::load failed: 0x%" PRIx32, program.error()); + return 1; + } + ET_LOG(Info, "Model buffer loaded, has %u methods", program->num_methods()); + + const char* method_name = nullptr; + { + const auto method_name_result = program->get_method_name(0); + ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); + method_name = *method_name_result; + } + ET_LOG(Info, "Running method %s", method_name); + + Result method_meta = + program->method_meta(method_name); + if (!method_meta.ok()) { + ET_LOG( + Error, + "method_meta('%s') failed: 0x%x", + method_name, + (unsigned int)method_meta.error()); + return 1; + } + + executorch::runtime::MemoryAllocator method_allocator( + sizeof(method_allocator_pool), method_allocator_pool); + + std::vector> planned_buffers; + std::vector> planned_spans; + const std::size_t num_planned = method_meta->num_memory_planned_buffers(); + for (std::size_t id = 0; id < num_planned; ++id) { + const std::size_t buffer_size = static_cast( + method_meta->memory_planned_buffer_size(id).get()); + ET_LOG(Info, "Setting up planned buffer %zu, size %zu", id, buffer_size); + planned_buffers.push_back(std::make_unique(buffer_size)); + planned_spans.push_back({planned_buffers.back().get(), buffer_size}); + } + executorch::runtime::HierarchicalAllocator planned_memory( + {planned_spans.data(), planned_spans.size()}); + + executorch::runtime::MemoryManager memory_manager( + &method_allocator, &planned_memory); + + Result method = + program->load_method(method_name, &memory_manager); + if (!method.ok()) { + ET_LOG( + Error, + "load_method('%s') failed: 0x%" PRIx32, + method_name, + method.error()); + return 1; + } + ET_LOG(Info, "Method loaded."); + + auto cleanup = executorch::extension::prepare_input_tensors(*method); + if (!cleanup.ok()) { + ET_LOG( + Error, + "prepare_input_tensors failed: 0x%x", + (unsigned int)cleanup.error()); + return 1; + } + ET_LOG(Info, "Starting model execution..."); + + Error status = method->execute(); + if (status != Error::Ok) { + ET_LOG(Error, "execute() failed for '%s': 0x%" PRIx32, method_name, status); + return 1; + } + ET_LOG(Info, "Model executed successfully."); + + std::vector outputs(method->outputs_size()); + method->get_outputs(outputs.data(), outputs.size()); + for (std::size_t i = 0; i < outputs.size(); ++i) { + if (!outputs[i].isTensor()) { + ET_LOG(Info, "output[%zu]: non-tensor", i); + continue; + } + const auto& t = outputs[i].toTensor(); + const float* p = t.const_data_ptr(); + const std::size_t n = t.numel() < 20 ? t.numel() : 20; + ET_LOG(Info, "First %zu elements of output %zu:", n, i); + for (std::size_t j = 0; j < n; ++j) { + ET_LOG(Info, " %f", p[j]); + } + } + return 0; +} diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt index 2e764541319..b5801f5d488 100644 --- a/backends/cadence/hifi/operators/CMakeLists.txt +++ b/backends/cadence/hifi/operators/CMakeLists.txt @@ -134,6 +134,8 @@ add_library( "op_quantized_conv2d_nchw_out.cpp" "op_quantized_conv1d_ncl.cpp" "op_quantized_conv1d_nlc.cpp" + "op_quantized_depthwise_conv1d_ncl.cpp" + "op_quantized_depthwise_conv1d_nlc.cpp" "op_quantized_conv2d_nchw_asym8sxsym8s_asym8s_per_tensor_out.cpp" "op_quantized_conv2d_nchw_asym8uxsym8u_asym8u_per_tensor_out.cpp" "op_quantized_conv2d_nchw_depthwise_asym8sxsym8s_asym8s_per_tensor_out.cpp"