From 9cd3b3403adc2b63fa8d17ee79cef6ec9b75909b Mon Sep 17 00:00:00 2001 From: deuszx Date: Thu, 14 May 2026 12:16:05 +0200 Subject: [PATCH] solidity: bulk memcpy for bcs_deserialize_offset_{bytes,string} Add EvmVersion config (Shanghai default / Cancun / Latest) to CodeGeneratorConfig and route it through to the Solidity backend. The byte-by-byte copy loop in bcs_deserialize_offset_bytes and the corresponding loop in bcs_deserialize_offset_string are now replaced with either: * Cancun / Latest: a single `MCOPY` (EIP-5656). * Shanghai: a word-by-word assembly memcpy (mload/mstore in 32-byte chunks). The trailing partial word writes into padding inside the `new bytes(len)` allocation (which rounds up to 32 bytes), so the write stays within bounds. The identity precompile (0x04) was the original target for Shanghai but `staticcall` makes the enclosing function non-pure. Solidity then rejects every `bcs_deserialize_*` declared `pure`. The word-loop keeps the existing `pure` API and is still far cheaper than the byte loop. Both paths are gated on `len > 0` so empty copies skip the assembly block entirely. Cover both paths with round-trip tests at lengths 0, 1, 31, 32, 33, and 1024 (boundaries where the word-loop and MCOPY diverge from a naive byte loop). --- serde-generate/src/config.rs | 23 +++++++ serde-generate/src/solidity.rs | 57 +++++++++++++--- serde-generate/tests/solidity_runtime.rs | 85 ++++++++++++++++++++++++ 3 files changed, 155 insertions(+), 10 deletions(-) diff --git a/serde-generate/src/config.rs b/serde-generate/src/config.rs index 7a3d330aa..1089e8c5b 100644 --- a/serde-generate/src/config.rs +++ b/serde-generate/src/config.rs @@ -14,6 +14,22 @@ pub struct CodeGeneratorConfig { pub custom_code: CustomCode, pub enums: EnumConfig, pub package_manifest: bool, + /// Target EVM version for the Solidity backend. Ignored by other backends. + pub evm_version: EvmVersion, +} + +/// Target EVM version for the Solidity backend. +/// +/// Selects which EVM instructions the generated code is allowed to use. +/// `Shanghai` (the default) is the conservative choice that runs on every +/// network currently supporting Solidity 0.8.x. `Cancun` (and `Latest`) +/// enables `MCOPY` (EIP-5656) for bulk memory copies. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] +pub enum EvmVersion { + #[default] + Shanghai, + Cancun, + Latest, } #[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq)] @@ -85,9 +101,16 @@ impl CodeGeneratorConfig { output_type: HashMap::new(), }, package_manifest: true, + evm_version: EvmVersion::default(), } } + /// Target EVM version (Solidity backend only). + pub fn with_evm_version(mut self, evm_version: EvmVersion) -> Self { + self.evm_version = evm_version; + self + } + pub fn module_name(&self) -> &str { &self.module_name } diff --git a/serde-generate/src/solidity.rs b/serde-generate/src/solidity.rs index 18211caa3..d1486389b 100644 --- a/serde-generate/src/solidity.rs +++ b/serde-generate/src/solidity.rs @@ -3,7 +3,7 @@ use crate::{ indent::{IndentConfig, IndentedWriter}, - CodeGeneratorConfig, + CodeGeneratorConfig, EvmVersion, }; use heck::SnakeCase; use phf::phf_set; @@ -35,6 +35,37 @@ fn get_data_location(need_memory: bool) -> String { } } +/// Emit a Solidity snippet that bulk-copies `len` bytes from +/// `input[src_off .. src_off + len]` into `dst[0 .. len]`. +/// +/// On Cancun (and later) this expands to a single `MCOPY` (EIP-5656). +/// On Shanghai it expands to a word-by-word assembly memcpy. The trailing +/// partial word writes into the padding bytes that `new bytes(len)` rounds +/// up to (the data slot is allocated in 32-byte chunks), so the write stays +/// within the allocation. The identity precompile (`0x04`) is not usable +/// here because Solidity classifies any `staticcall`/`gas()` use as +/// state-reading and would reject these `pure` deserializers. +fn bytes_copy_snippet(evm_version: EvmVersion, dst: &str, len: &str, src_off: &str) -> String { + let body = match evm_version { + EvmVersion::Shanghai => format!( + r#"let _dst := add({dst}, 0x20) + let _src := add(add(input, 0x20), {src_off}) + let _end := add(_dst, {len}) + for {{ }} lt(_dst, _end) {{ }} {{ + mstore(_dst, mload(_src)) + _dst := add(_dst, 0x20) + _src := add(_src, 0x20) + }}"# + ), + EvmVersion::Cancun | EvmVersion::Latest => { + format!("mcopy(add({dst}, 0x20), add(add(input, 0x20), {src_off}), {len})") + } + }; + format!( + "if ({len} > 0) {{\n assembly (\"memory-safe\") {{\n {body}\n }}\n }}" + ) +} + fn output_generic_bcs_deserialize( out: &mut IndentedWriter, key_name: &str, @@ -158,7 +189,11 @@ impl Primitive { matches!(self, Primitive::Unit | Primitive::Bytes | Primitive::Str) } - pub fn output(&self, out: &mut IndentedWriter) -> Result<()> { + pub fn output( + &self, + out: &mut IndentedWriter, + evm_version: EvmVersion, + ) -> Result<()> { use Primitive::*; match self { Unit => writeln!( @@ -571,6 +606,7 @@ function bcs_deserialize_offset_bytes1(uint256 pos, bytes memory input) )?; } Str => { + let copy = bytes_copy_snippet(evm_version, "result_bytes", "shift", "new_pos"); writeln!( out, r#" @@ -615,9 +651,7 @@ function bcs_deserialize_offset_string(uint256 pos, bytes memory input) }} }} bytes memory result_bytes = new bytes(shift); - for (uint256 i=0; i { + let copy = bytes_copy_snippet(evm_version, "result", "len", "new_pos"); writeln!( out, r#" @@ -647,9 +682,7 @@ function bcs_deserialize_offset_bytes(uint256 pos, bytes memory input) uint256 new_pos; (new_pos, len) = bcs_deserialize_offset_len(pos, input); bytes memory result = new bytes(len); - for (uint256 u=0; u { - primitive.output(out)?; + primitive.output(out, sol_registry.evm_version)?; let full_name = primitive.name(); let need_memory = primitive.need_memory(); output_generic_bcs_deserialize(out, &full_name, &full_name, need_memory)?; @@ -1209,6 +1242,7 @@ struct SolRegistry { /// Maps external type key_names to their qualified module prefix. /// e.g., "Account" → "BridgeTypes" external_modules: HashMap, + evm_version: EvmVersion, } impl SolRegistry { @@ -1618,7 +1652,10 @@ impl<'a> CodeGenerator<'a> { generator: self, }; - let mut sol_registry = SolRegistry::default(); + let mut sol_registry = SolRegistry { + evm_version: self.config.evm_version, + ..SolRegistry::default() + }; // External definitions: module name → list of type names defined in that module. // Types present in both the registry and external_definitions are treated as // external — they are imported rather than generated locally. This is the diff --git a/serde-generate/tests/solidity_runtime.rs b/serde-generate/tests/solidity_runtime.rs index b2a0edad2..7673fc3bf 100644 --- a/serde-generate/tests/solidity_runtime.rs +++ b/serde-generate/tests/solidity_runtime.rs @@ -602,3 +602,88 @@ contract ExampleCode {{ test_contract(bytecode.clone(), fct_args); Ok(()) } + +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +pub struct BytesPayload { + #[serde(with = "serde_bytes")] + data: Vec, +} + +fn run_bytes_roundtrip( + evm_version: serde_generate::EvmVersion, + payload: Vec, +) -> anyhow::Result<()> { + let registry = get_registry_from_type::(); + let dir = tempdir().unwrap(); + let path = dir.path(); + + let test_library_path = path.join("Library.sol"); + { + let mut test_library_file = File::create(&test_library_path)?; + let name = "Library".to_string(); + let config = CodeGeneratorConfig::new(name).with_evm_version(evm_version); + let generator = solidity::CodeGenerator::new(&config); + generator.output(&mut test_library_file, ®istry).unwrap(); + } + + let test_code_path = path.join("test_code.sol"); + { + let mut test_code_file = File::create(&test_code_path)?; + let expected_len = payload.len(); + writeln!( + test_code_file, + r#"/// SPDX-License-Identifier: UNLICENSED +pragma solidity ^0.8.0; + +import "./Library.sol"; + +contract ExampleCode {{ + function test_deserialization(bytes calldata input) external {{ + Library.BytesPayload memory t = Library.bcs_deserialize_BytesPayload(input); + require(t.data.length == {expected_len}, "incorrect bytes length"); + bytes memory input_rev = Library.bcs_serialize_BytesPayload(t); + require(input.length == input_rev.length, "round-trip length mismatch"); + for (uint256 i = 0; i < input.length; i++) {{ + require(input[i] == input_rev[i], "round-trip byte mismatch"); + }} + }} +}} +"# + )?; + } + + let bytecode = get_bytecode(path, "test_code.sol", "ExampleCode")?; + + let t = BytesPayload { data: payload }; + let expected_input = bcs::to_bytes(&t).unwrap(); + + sol! { + function test_deserialization(bytes calldata input); + } + let input = Bytes::copy_from_slice(&expected_input); + let fct_args = test_deserializationCall { input }; + let fct_args = fct_args.abi_encode().into(); + + test_contract(bytecode, fct_args); + Ok(()) +} + +// Cover the bulk-copy code path in bcs_deserialize_offset_bytes at the +// boundaries where the word-loop (Shanghai) and MCOPY (Cancun) implementations +// differ: empty, sub-word, exactly one word, one word + one byte, and a +// kilobyte that exercises the loop at scale. +#[test] +fn test_bytes_copy_shanghai() { + for len in [0_usize, 1, 31, 32, 33, 1024] { + let payload: Vec = (0..len).map(|i| (i & 0xff) as u8).collect(); + run_bytes_roundtrip(serde_generate::EvmVersion::Shanghai, payload).unwrap(); + } +} + +#[test] +fn test_bytes_copy_cancun() { + for len in [0_usize, 1, 31, 32, 33, 1024] { + let payload: Vec = (0..len).map(|i| (i & 0xff) as u8).collect(); + run_bytes_roundtrip(serde_generate::EvmVersion::Cancun, payload).unwrap(); + } +}