diff --git a/mllm/backends/qnn/QNNAllocator.cpp b/mllm/backends/qnn/QNNAllocator.cpp index dc04b5d0b..0f29d5e59 100644 --- a/mllm/backends/qnn/QNNAllocator.cpp +++ b/mllm/backends/qnn/QNNAllocator.cpp @@ -6,6 +6,8 @@ #include "mllm/utils/Common.hpp" #include "mllm/utils/Log.hpp" #include +#include +#include namespace mllm::qnn { @@ -78,7 +80,25 @@ void QNNAllocator::free(Storage* storage) { void QNNAllocator::registerQnnTensorToSharedBuffer(void* ptr, Qnn_Tensor_t& qnn_tensor) { // Make sure there has a memory that we can register to. - MLLM_RT_ASSERT(qnnMemPtrSet_.count(ptr)); + if (!qnnMemPtrSet_.count(ptr)) { + std::ostringstream dims; + dims << "["; + const auto rank = QNN_TENSOR_GET_RANK(qnn_tensor); + const auto* shape = QNN_TENSOR_GET_DIMENSIONS(qnn_tensor); + for (uint32_t i = 0; i < rank; ++i) { dims << (i == 0 ? "" : ",") << shape[i]; } + dims << "]"; + std::fprintf(stderr, + "QNN shared-buffer register failed: tensor='%s', ptr=%p, dtype=%d, rank=%u, dims=%s is not owned by " + "QNNAllocator (owned ptr count=%zu)\n", + QNN_TENSOR_GET_NAME(qnn_tensor) ? QNN_TENSOR_GET_NAME(qnn_tensor) : "", ptr, + static_cast(QNN_TENSOR_GET_DATA_TYPE(qnn_tensor)), rank, dims.str().c_str(), qnnMemPtrSet_.size()); + std::fflush(stderr); + MLLM_ERROR("QNN shared-buffer register failed: tensor='{}', ptr={}, dtype={}, rank={}, dims={} is not owned by " + "QNNAllocator (owned ptr count={})", + QNN_TENSOR_GET_NAME(qnn_tensor) ? QNN_TENSOR_GET_NAME(qnn_tensor) : "", ptr, + static_cast(QNN_TENSOR_GET_DATA_TYPE(qnn_tensor)), rank, dims.str(), qnnMemPtrSet_.size()); + MLLM_RT_ASSERT(qnnMemPtrSet_.count(ptr)); + } // if already registered, just set the mem handle if (ptrToFdAndMemHandleMap_.count(ptr) > 0) { @@ -90,7 +110,14 @@ void QNNAllocator::registerQnnTensorToSharedBuffer(void* ptr, Qnn_Tensor_t& qnn_ // Get the file id of this memory space. int mem_fd = rpcmem_to_fd(ptr); - MLLM_RT_ASSERT(mem_fd != -1); + if (mem_fd == -1) { + std::fprintf(stderr, "QNN shared-buffer register failed: rpcmem_to_fd returned -1 for tensor='%s', ptr=%p\n", + QNN_TENSOR_GET_NAME(qnn_tensor) ? QNN_TENSOR_GET_NAME(qnn_tensor) : "", ptr); + std::fflush(stderr); + MLLM_ERROR("QNN shared-buffer register failed: rpcmem_to_fd returned -1 for tensor='{}', ptr={}", + QNN_TENSOR_GET_NAME(qnn_tensor) ? QNN_TENSOR_GET_NAME(qnn_tensor) : "", ptr); + MLLM_RT_ASSERT(mem_fd != -1); + } // Make qnn memory descriptor. Set ION. Qnn_MemDescriptor_t mem_descriptor = QNN_MEM_DESCRIPTOR_INIT; @@ -106,7 +133,24 @@ void QNNAllocator::registerQnnTensorToSharedBuffer(void* ptr, Qnn_Tensor_t& qnn_ // Register to QNN memory Qnn_MemHandle_t mem_handle = QNN_TENSOR_GET_MEM_HANDLE(qnn_tensor); - MLLM_RT_ASSERT_EQ(QNN_SUCCESS, qnnInterface_.memRegister(context_, &mem_descriptor, 1u, &mem_handle)); + Qnn_ErrorHandle_t status = qnnInterface_.memRegister(context_, &mem_descriptor, 1u, &mem_handle); + if (QNN_SUCCESS != status) { + std::ostringstream dims; + dims << "["; + const auto rank = QNN_TENSOR_GET_RANK(qnn_tensor); + const auto* shape = QNN_TENSOR_GET_DIMENSIONS(qnn_tensor); + for (uint32_t i = 0; i < rank; ++i) { dims << (i == 0 ? "" : ",") << shape[i]; } + dims << "]"; + std::fprintf(stderr, "QNN memRegister failed: status=%lu, tensor='%s', ptr=%p, fd=%d, dtype=%d, rank=%u, dims=%s\n", + static_cast(status), + QNN_TENSOR_GET_NAME(qnn_tensor) ? QNN_TENSOR_GET_NAME(qnn_tensor) : "", ptr, mem_fd, + static_cast(QNN_TENSOR_GET_DATA_TYPE(qnn_tensor)), rank, dims.str().c_str()); + std::fflush(stderr); + MLLM_ERROR("QNN memRegister failed: status={}, tensor='{}', ptr={}, fd={}, dtype={}, rank={}, dims={}", status, + QNN_TENSOR_GET_NAME(qnn_tensor) ? QNN_TENSOR_GET_NAME(qnn_tensor) : "", ptr, mem_fd, + static_cast(QNN_TENSOR_GET_DATA_TYPE(qnn_tensor)), rank, dims.str()); + MLLM_RT_ASSERT_EQ(QNN_SUCCESS, status); + } QNN_TENSOR_SET_MEM_HANDLE(qnn_tensor, mem_handle); diff --git a/mllm/backends/qnn/QNNBackend.cpp b/mllm/backends/qnn/QNNBackend.cpp index 3900afc35..5cbccb1c2 100644 --- a/mllm/backends/qnn/QNNBackend.cpp +++ b/mllm/backends/qnn/QNNBackend.cpp @@ -657,13 +657,17 @@ void QNNBackend::graphExecute(const std::string& graphName, std::vector& inputs.size(), graphName); return; } + if (outputs.size() != model->getGraphOutputTensorWrappers().size()) { + MLLM_ERROR("Output size mismatch: expected {}, got {} for graph '{}'", model->getGraphOutputTensorWrappers().size(), + outputs.size(), graphName); + return; + } std::vector qnn_inputs; std::vector qnn_outputs; // Prepare QNN inputs for (int i = 0; i < model->getGraphInputTensorWrappers().size(); i++) { auto wrapper = model->getGraphInputTensorWrappers()[i]; - auto& wrapper_tensor = wrapper->getDataContainer(); const auto& runtime_input = inputs[i]; // Validate input tensors @@ -672,9 +676,9 @@ void QNNBackend::graphExecute(const std::string& graphName, std::vector& return; } - // Case of executing retrieved graph created by AOT - // input wrapper is empty, set wrapper's dataContainer(mllm::Tensor) - if (!wrapper->isAlloc()) { wrapper->__setDataContainer(runtime_input); } + // Retrieved AOT graphs may be executed repeatedly with different runtime buffers + // in diagnostic paths. Rebind on every execution so QNN sees the current tensor. + wrapper->__setDataContainer(runtime_input); // Allocate and register the wrapper tensor with QNN allocator // QNNAllocator will handle registered memory descriptor when needed @@ -684,7 +688,6 @@ void QNNBackend::graphExecute(const std::string& graphName, std::vector& // Prepare QNN outputs for (int j = 0; j < model->getGraphOutputTensorWrappers().size(); j++) { auto wrapper = model->getGraphOutputTensorWrappers()[j]; - auto& wrapper_tensor = wrapper->getDataContainer(); const auto& runtime_output = outputs[j]; // Validate output tensors @@ -693,8 +696,9 @@ void QNNBackend::graphExecute(const std::string& graphName, std::vector& return; } - // output wrapper is empty, set wrapper's dataContainer(mllm::Tensor) - if (!wrapper->isAlloc()) { wrapper->__setDataContainer(runtime_output); } + // Retrieved AOT graphs may be executed repeatedly with different runtime buffers + // in diagnostic paths. Rebind on every execution so QNN writes to the current tensor. + wrapper->__setDataContainer(runtime_output); // alloc and register qnn tensor wrapper->alloc(); // QNNAllocator will handle registered memory descriptor diff --git a/mllm/backends/qnn/QNNModel.cpp b/mllm/backends/qnn/QNNModel.cpp index 6fc6110bf..75221e89d 100644 --- a/mllm/backends/qnn/QNNModel.cpp +++ b/mllm/backends/qnn/QNNModel.cpp @@ -3,12 +3,34 @@ #include "mllm/backends/qnn/QNNModel.hpp" #include +#include +#include #include "mllm/backends/qnn/QNNTypeMacros.hpp" #include "mllm/backends/qnn/QNNUtils.hpp" #include "mllm/utils/Log.hpp" namespace mllm::qnn { +namespace { + +bool shouldDumpQnnIO() { + const char* flag = std::getenv("MLLM_QNN_DUMP_IO"); + return flag != nullptr && std::string(flag) != "0"; +} + +std::string dimsToString(const std::vector& dims) { + std::ostringstream oss; + oss << "["; + for (size_t i = 0; i < dims.size(); ++i) { + if (i > 0) { oss << ", "; } + oss << dims[i]; + } + oss << "]"; + return oss.str(); +} + +} // namespace + template void freeMultiPtr(Args... args) { (free(args), ...); @@ -112,6 +134,11 @@ ModelError_t QNNModel::loadGraphTensorInfo(const Qnn_Tensor_t* inputTensors, uin inputTensorWrappers_.push_back(wrapper); tensorWrapperMap_[tensorName] = wrapper; + + if (shouldDumpQnnIO()) { + MLLM_INFO("QNN graph {} input[{}]: name='{}', dtype={}, dims={}", graphName_, i, tensorName, + static_cast(QNN_TENSOR_GET_DATA_TYPE(tensor)), dimsToString(dimensions)); + } } // Create wrappers for output tensors @@ -134,6 +161,11 @@ ModelError_t QNNModel::loadGraphTensorInfo(const Qnn_Tensor_t* inputTensors, uin outputTensorWrappers_.push_back(wrapper); tensorWrapperMap_[tensorName] = wrapper; + + if (shouldDumpQnnIO()) { + MLLM_INFO("QNN graph {} output[{}]: name='{}', dtype={}, dims={}", graphName_, i, tensorName, + static_cast(QNN_TENSOR_GET_DATA_TYPE(tensor)), dimsToString(dimensions)); + } } MLLM_INFO("QNNModel::loadGraphTensorInfo() loaded {} input tensors and {} output tensors for graph: {}", numInputTensors, diff --git a/mllm/backends/qnn/QNNUtils.hpp b/mllm/backends/qnn/QNNUtils.hpp index 36fb6a91c..e798b29dd 100644 --- a/mllm/backends/qnn/QNNUtils.hpp +++ b/mllm/backends/qnn/QNNUtils.hpp @@ -209,9 +209,8 @@ class QNNTensorWrapper { bool isAlloc() { return isAlloc_; } void __setDataContainer(const Tensor& tensor) { - MLLM_RT_ASSERT(dataContainer_.isNil()) dataContainer_ = tensor; - if (!tensor.isNil()) { isAlloc_ = true; } + isAlloc_ = !tensor.isNil(); } // Helper to set complex quantization params and manage memory diff --git a/mllm/backends/qnn/aot/QnnWrappersAPI.cpp b/mllm/backends/qnn/aot/QnnWrappersAPI.cpp index 2a2e6010f..8d7a9e501 100644 --- a/mllm/backends/qnn/aot/QnnWrappersAPI.cpp +++ b/mllm/backends/qnn/aot/QnnWrappersAPI.cpp @@ -23,9 +23,18 @@ namespace mllm::qnn::aot { +namespace { + +std::string qnnTensorNameFromIR(const ir::tensor::TensorValue::ptr_t& v) { + if (v && v->hasSymbolAttr()) { return v->getSymbolAttr()->str(); } + return v ? v->name() : ""; +} + +} // namespace + QnnAOTNodeTensor::QnnAOTNodeTensor(const ir::tensor::TensorValue::ptr_t& v, bool force_static_weight) { auto type = parseQnnTensorTypeFromIR(v); - auto name = v->name(); + auto name = parseQnnTensorNameFromIR(v); auto quant = parseQnnQuantizeParamFromIR(v); if (force_static_weight || type == QNN_TENSOR_TYPE_STATIC) { @@ -103,7 +112,9 @@ Qnn_DataType_t QnnAOTNodeTensor::parseQnnDataTypeFromIR(const ir::tensor::Tensor return mllm::qnn::mllmDataTypeToQnnDataType(v->tensor_.dtype()); } -std::string QnnAOTNodeTensor::parseQnnTensorNameFromIR(const ir::tensor::TensorValue::ptr_t& v) { return v->name(); } +std::string QnnAOTNodeTensor::parseQnnTensorNameFromIR(const ir::tensor::TensorValue::ptr_t& v) { + return qnnTensorNameFromIR(v); +} Qnn_QuantizeParams_t QnnAOTNodeTensor::parseQnnQuantizeParamFromIR(const ir::tensor::TensorValue::ptr_t& v) { Qnn_QuantizeParams_t ret = QNN_QUANTIZE_PARAMS_INIT; @@ -139,10 +150,30 @@ Qnn_QuantizeParams_t QnnAOTNodeTensor::parseQnnQuantizeParamFromIR(const ir::ten MLLM_ERROR_EXIT(ExitCode::kCoreError, "SymPerTensor quant recipe has no scale. tensor: {}", v->name()); } - MLLM_RT_ASSERT_EQ(cfg->quant_to_type, kUInt8); + int32_t offset = 0; + switch (cfg->quant_to_type) { + case kUInt8: { + offset = -128; + break; + } + case kUInt16: { + offset = -32768; + break; + } + case kInt8: + case kInt16: { + offset = 0; + break; + } + default: { + MLLM_ERROR_EXIT(ExitCode::kCoreError, "Unsupported SymPerTensor quant target type {} for tensor: {}", + nameOfType(cfg->quant_to_type), v->name()); + } + } - ret.scaleOffsetEncoding = Qnn_ScaleOffset_t{.scale = cfg->scale.item(), .offset = -128}; - MLLM_INFO("Configuring SymPerTensor quantization for tensor: {}, scale: {}", v->name(), cfg->scale.item()); + ret.scaleOffsetEncoding = Qnn_ScaleOffset_t{.scale = cfg->scale.item(), .offset = offset}; + MLLM_INFO("Configuring SymPerTensor quantization for tensor: {}, scale: {}, offset: {}", v->name(), + cfg->scale.item(), offset); break; } default: { @@ -335,8 +366,13 @@ void QnnAOTGraph::addOperation(const QnnAOTNodeOperation::ptr_t& qnn_op) { for (auto& in : qnn_op->inputs) qnn_model_->addTensorWrapper(in->getWrapper()); for (auto& out : qnn_op->outputs) qnn_model_->addTensorWrapper(out->getWrapper()); - qnn_model_->addNode(QNN_OPCONFIG_VERSION_1, qnn_op->name_, qnn_op->package_name_, qnn_op->op_name_, qnn_op->param_tensor, - qnn_op->param_scalar, inputNames, outputNames); + auto add_node_status = + qnn_model_->addNode(QNN_OPCONFIG_VERSION_1, qnn_op->name_, qnn_op->package_name_, qnn_op->op_name_, + qnn_op->param_tensor, qnn_op->param_scalar, inputNames, outputNames); + if (add_node_status != mllm::qnn::MODEL_NO_ERROR) { + MLLM_ERROR_EXIT(ExitCode::kCoreError, "QNN AOT failed to add node {} (op type {}) to graph.", qnn_op->name_, + qnn_op->op_name_); + } op_node_.insert({qnn_op->getName(), qnn_op}); } @@ -686,7 +722,7 @@ void QnnAOTEnv::captureAOTNodeOp(const std::string& qnn_context_name, const std: QnnAOTNodeTensor::ptr_t QnnAOTEnv::captureQnnAOTNodeTensor(const std::string& qnn_context_name, const std::string& graph_name, const ir::tensor::TensorValue::ptr_t& v, bool force_static_weight) { - auto __qnn_tensor_name = v->name(); + auto __qnn_tensor_name = qnnTensorNameFromIR(v); bool __qnn_enable_static_weight = force_static_weight; diff --git a/mllm/backends/qnn/aot/visitor/Matmul.cpp b/mllm/backends/qnn/aot/visitor/Matmul.cpp index b44afd780..90e3f90bf 100644 --- a/mllm/backends/qnn/aot/visitor/Matmul.cpp +++ b/mllm/backends/qnn/aot/visitor/Matmul.cpp @@ -4,6 +4,7 @@ #include "mllm/utils/Common.hpp" #include "mllm/compile/ir/linalg/Op.hpp" #include "mllm/compile/ir/builtin/Attribute.hpp" +#include "mllm/core/aops/MatMulOp.hpp" #include "mllm/backends/qnn/aot/QnnWrappersAPI.hpp" #include "mllm/backends/qnn/aot/visitor/Matmul.hpp" #include "mllm/backends/qnn/aot/passes/AOTCompileContext.hpp" @@ -22,6 +23,11 @@ bool QnnAOTMatMulPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& op) MLLM_ERROR("Failed to cast to linalg::MatMulOp"); return false; } + auto aop = dynamic_cast(matmul_op->getAOp()); + if (!aop) { + MLLM_ERROR("Failed to cast AOp to aops::MatMulOp"); + return false; + } MLLM_RETURN_FALSE_IF_NOT(op->getAttr("qnn_graph_name")); auto qnn_graph_name = op->getAttr("qnn_graph_name")->cast_()->data(); @@ -44,6 +50,10 @@ bool QnnAOTMatMulPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& op) ->emplaceOutput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, output)) ->setName(matmul_op->getAOp()->getName()); + const auto& options = aop->options(); + qnn_op_node->emplaceParamScalar(QNNParamScalarWrapper::create("transpose_in0", options.transpose_a)); + qnn_op_node->emplaceParamScalar(QNNParamScalarWrapper::create("transpose_in1", options.transpose_b)); + // Register this op node into one graph. env->captureAOTNodeOp(qnn_context_name, qnn_graph_name, qnn_op_node); diff --git a/mllm/backends/qnn/aot/visitor/Repeat.cpp b/mllm/backends/qnn/aot/visitor/Repeat.cpp index e6eb0542a..27a482e8e 100644 --- a/mllm/backends/qnn/aot/visitor/Repeat.cpp +++ b/mllm/backends/qnn/aot/visitor/Repeat.cpp @@ -8,6 +8,7 @@ #include "mllm/backends/qnn/aot/visitor/Repeat.hpp" #include "mllm/backends/qnn/aot/passes/AOTCompileContext.hpp" #include "mllm/core/aops/RepeatOp.hpp" +#include "mllm/core/Tensor.hpp" #include namespace mllm::qnn::aot { @@ -49,36 +50,55 @@ bool QnnAOTRepeatPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& op) if (dim < 0) { dim += rank; } - std::vector multiples(rank, 1); + std::vector multiples(rank + 1, 1); if (dim >= 0 && dim < rank) { - multiples[dim] = (uint32_t)repeat_times; + multiples[dim + 1] = (uint32_t)repeat_times; } else { MLLM_ERROR("Invalid dimension for RepeatOp: {}", dim); return false; } - // Create QNN Op Node - // QNN uses "Tile" for repeat - auto qnn_op_node = QnnAOTNodeOperation::create("Tile"); - qnn_op_node->setPackageName("qti.aisw"); - - // Add Input - qnn_op_node->emplaceInput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, input)); - - // Add multiples Param + // mllm Repeat semantics are repeat_interleave along one dimension. QNN Tile + // repeats whole dimensions, so use reshape + tile + reshape: + // [.., D, ..] -> [.., D, 1, ..] -> tile inserted dim -> [.., D * repeat, ..]. + auto expanded_shape = input_shape; + expanded_shape.insert(expanded_shape.begin() + dim + 1, 1); + auto tiled_shape = expanded_shape; + tiled_shape[dim + 1] = repeat_times; + + auto expanded = writer.getContext()->create( + Tensor::empty(expanded_shape, input->tensor_.dtype(), input->tensor_.device())); + expanded->setAttr("quant_recipe", input->getAttr("quant_recipe")); + auto tiled = writer.getContext()->create( + Tensor::empty(tiled_shape, input->tensor_.dtype(), input->tensor_.device())); + tiled->setAttr("quant_recipe", input->getAttr("quant_recipe")); + + auto reshape_in = QnnAOTNodeOperation::create("Reshape"); + reshape_in->setPackageName("qti.aisw"); + reshape_in->emplaceInput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, input)) + ->emplaceOutput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, expanded)) + ->setName(base_op->getName() + ".repeat_interleave_reshape_in"); + env->captureAOTNodeOp(qnn_context_name, qnn_graph_name, reshape_in); + + auto tile = QnnAOTNodeOperation::create("Tile"); + tile->setPackageName("qti.aisw"); + tile->emplaceInput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, expanded)); auto multiplesName = base_op->getName() + ".multiples"; - auto multiplesParam = - QNNParamTensorWrapper::create("multiples", multiplesName, QNN_DATATYPE_UINT_32, std::vector{(uint32_t)rank}); + auto multiplesParam = QNNParamTensorWrapper::create("multiples", multiplesName, QNN_DATATYPE_UINT_32, + std::vector{(uint32_t)multiples.size()}); uint32_t* multiplesData = static_cast(multiplesParam->alloc()); - std::memcpy(multiplesData, multiples.data(), rank * sizeof(uint32_t)); - qnn_op_node->emplaceParamTensor(multiplesParam); - - // Add Output - qnn_op_node->emplaceOutput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, output)) + std::memcpy(multiplesData, multiples.data(), multiples.size() * sizeof(uint32_t)); + tile->emplaceParamTensor(multiplesParam); + tile->emplaceOutput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, tiled)) + ->setName(base_op->getName() + ".repeat_interleave_tile"); + env->captureAOTNodeOp(qnn_context_name, qnn_graph_name, tile); + + auto reshape_out = QnnAOTNodeOperation::create("Reshape"); + reshape_out->setPackageName("qti.aisw"); + reshape_out->emplaceInput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, tiled)) + ->emplaceOutput(env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, output)) ->setName(base_op->getName()); - - // Register - env->captureAOTNodeOp(qnn_context_name, qnn_graph_name, qnn_op_node); + env->captureAOTNodeOp(qnn_context_name, qnn_graph_name, reshape_out); return true; } diff --git a/mllm/backends/qnn/aot/visitor/View.cpp b/mllm/backends/qnn/aot/visitor/View.cpp index e3447edd9..279a16b53 100644 --- a/mllm/backends/qnn/aot/visitor/View.cpp +++ b/mllm/backends/qnn/aot/visitor/View.cpp @@ -35,6 +35,22 @@ bool QnnAOTViewPattern::rewrite(ir::IRWriter& writer, const ir::op_ptr_t& op) { // Output auto output = op->outputs().front()->cast_(); + // mllm ViewOp can be a metadata-only no-op and reuse the same TensorValue name + // for input/output. QNN Reshape cannot write back to the exact same graph + // tensor, so keep true no-op views as aliases. Shape-changing views should be + // traced with enable_ssa=true by model code so that QNN receives a distinct + // output tensor with the new shape. + if (input->name() == output->name()) { + if (input->tensor_.shape() == output->tensor_.shape()) { + env->captureQnnAOTNodeTensor(qnn_context_name, qnn_graph_name, input); + return true; + } + MLLM_ERROR("QNN AOT ViewOp {} changes shape from [{}] to [{}] but input/output share tensor name {}. " + "Use Tensor::view(..., true) for this path.", + view_op->getAOp()->getName(), input->tensor_.shape(), output->tensor_.shape(), input->name()); + return false; + } + // Create Shape Tensor auto output_shape = output->tensor_.shape(); std::vector shape_data; diff --git a/mllm/core/aops/VisionRoPEOp.cpp b/mllm/core/aops/VisionRoPEOp.cpp index fdd0e9200..6197788d5 100644 --- a/mllm/core/aops/VisionRoPEOp.cpp +++ b/mllm/core/aops/VisionRoPEOp.cpp @@ -9,7 +9,7 @@ namespace mllm::aops { -VisionRoPEOp::VisionRoPEOp(const VisionRoPEOpOptions& options) : BaseOp(OpTypes::kSiLU), options_(options) {} +VisionRoPEOp::VisionRoPEOp(const VisionRoPEOpOptions& options) : BaseOp(OpTypes::kVisionRoPE), options_(options) {} void VisionRoPEOp::load(const ParameterFile::ptr_t& ploader) { MLLM_EMPTY_SCOPE; } @@ -31,4 +31,4 @@ void VisionRoPEOp::reshape(const std::vector& inputs, std::vector& inputs, std::vector& outputs) { BaseOp::setup(inputs, outputs); } -} // namespace mllm::aops \ No newline at end of file +} // namespace mllm::aops diff --git a/mllm/mllm.hpp b/mllm/mllm.hpp index 27ea0abe0..35331c1cd 100644 --- a/mllm/mllm.hpp +++ b/mllm/mllm.hpp @@ -330,12 +330,12 @@ inline void print_stack_trace() { const char* name = demangled ? demangled : info.dli_sname; char line[256]; int len = snprintf(line, sizeof(line), "#%d %p %s\n", i, buffer[i], name); - safe_write(line, len); + safe_write(line, std::min(static_cast(std::max(len, 0)), sizeof(line) - 1)); free(demangled); } else { char line[256]; int len = snprintf(line, sizeof(line), "#%d %p\n", i, buffer[i]); - safe_write(line, len); + safe_write(line, std::min(static_cast(std::max(len, 0)), sizeof(line) - 1)); } } #endif