________________________________________ From: Mike Day Sent: Monday, March 26, 2007 9:06 AM To: Mike Acton; tech Subject: RE: curiously small code... The vector version of swizzle_lo8 comes out a bit shorter using selection masks… qword swizzle_lo8( qword x, qword y ) { // Create some masks. ilh m20 = ilh( 0xFF00 ); ilh m30 = ilh( 0xF0F0 ); ilh m40 = ilh( 0xCCCC ); ilh m50 = ilh( 0xAAAA ); // Expand(x) -- Shift left each bit by bit index. Zero unused bits. qword x20 = si_shlhi( x, 4 ); qword x21 = si_selb( x, x20, m20 ); qword x30 = si_shlhi( x21, 2 ); qword x31 = si_selb( x21, x30, m30 ); qword x40 = si_shlhi( x31, 1 ); qword x41 = si_selb( x31, x40, m40 ); // Expand(y) -- Shift left each bit by bit index. Zero unused bits. qword y20 = si_shlhi( y, 4 ); qword y21 = si_selb( y, y20, m20 ); qword y30 = si_shlhi( y21, 2 ); qword y31 = si_selb( y21, y30, m30 ); qword y40 = si_shlhi( y31, 1 ); qword y41 = si_selb( y31, y40, m40 ); // Combine. qword z00 = si_shlhi( y41, 1 ); qword z01 = si_selb( x41, z00, m50 ); return z01; } I’ll leave the 16-bit one for the interested reader! Mike