@@ -31,6 +31,7 @@ use core::arch::asm;
3131/// of register allocation on `x86`, we explicitly specify registers to use.
3232#[ cfg( all( feature = "inline-asm" , target_arch = "aarch64" ) ) ]
3333pub fn compress ( state : & mut [ u32 ; 5 ] , blocks : & [ [ u8 ; 64 ] ] ) {
34+ let mut out_state = [ 0u32 ; 5 ] ;
3435 // SAFETY: inline-assembly
3536 unsafe {
3637 asm ! (
@@ -57,9 +58,10 @@ pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
5758 // original code:
5859 // ldr q5, [x0]
5960 // ldr s16, [x0, 16]
60- // mov v6.16b, v5.16b
61- in( q5) state[ 0 ..4 ] ,
62- in( s16) state[ 4 ] ,
61+ // this now happens at the bottom...
62+ // TODO what is this doing?
63+ // i believe it's copying state[0..4] into v6 (which is also q6)
64+ // confirmed this is the mutable copy of the first 4 words of the state
6365 "mov v6.16b, v5.16b" ,
6466
6567 // Load block in registers
@@ -68,12 +70,10 @@ pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
6870 // ldr q1, [x1, 16]
6971 // ldr q2, [x1, 32]
7072 // ldr q3, [x1, 48]
71- in( q0) blocks[ 0 ] [ 0 ..16 ] ,
72- in( q1) blocks[ 0 ] [ 16 ..32 ] ,
73- in( q2) blocks[ 0 ] [ 32 ..48 ] ,
74- in( q3) blocks[ 0 ] [ 48 ..64 ] ,
73+ // this is at the bottom now
7574
7675 // from original code: TODO: only do that on little endian
76+ // this flips the blocks from little to big endian
7777 "rev32 v0.16b, v0.16b" ,
7878 "rev32 v1.16b, v1.16b" ,
7979 "rev32 v2.16b, v2.16b" ,
@@ -230,16 +230,16 @@ pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
230230 // Update state
231231 "add v6.4s, v6.4s, v5.4s" ,
232232 // source code: str q6, [x0]
233- out ( q6 ) state [ 0 .. 4 ] ,
233+ // this now happens at the bottom
234234 "add v16.2s, v16.2s, v17.2s" ,
235235 // source code: str s16, [x0, 16]
236- out ( s16 ) state [ 4 ] ,
236+ // this now happens at the bottom
237237
238238 "ret" , // TODO is this right
239239
240240 ".align 4" , // TODO ummm alignment...
241241 ".K0:" , // TODO are labels just the same in inline asm in rust?
242- ".word 0x5A827999"
242+ ".word 0x5A827999" ,
243243 ".word 0x5A827999" ,
244244 ".word 0x5A827999" ,
245245 ".word 0x5A827999" ,
@@ -259,6 +259,21 @@ pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
259259 ".word 0xCA62C1D6" ,
260260 ".word 0xCA62C1D6" ,
261261
262+ // state ins and outs
263+ in( "q4" ) state. as_mut_ptr( ) ,
264+ inout( "s16" ) state[ 4 ] ,
265+ lateout( "q6" ) state as * mut u32 ,
266+ // blocks in
267+ in( "q0" ) blocks[ 0 ] [ 0 ..16 ] . as_ptr( ) ,
268+ in( "q1" ) blocks[ 0 ] [ 16 ..32 ] . as_ptr( ) ,
269+ in( "q2" ) blocks[ 0 ] [ 32 ..48 ] . as_ptr( ) ,
270+ in( "q3" ) blocks[ 0 ] [ 48 ..64 ] . as_ptr( ) ,
271+ // some clobbers
272+ out( "q5" ) _,
273+ out( "s17" ) _,
274+ out( "s18" ) _,
275+ out( "q19" ) _,
276+ // TODO make sure there aren't any other clobbers
262277 ) ;
263278 } ;
264279}
0 commit comments