/* * Copyright (c) 2007, Insomniac Games * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of the Insomniac Games nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ----------------------------------------------------------------------------- * * a simple root finding algorithm is used to find the value of 't' that * satisfies: * d|D(t)|^2/dt = 0 * where: * |D(t)| = |p(t)-b(t)| * where p(t) is a point on the line parameterized by t: * p(t) = p1 + t*(p2-p1) * and b(t) is that same point clipped to the boundary of the box. in box- * relative coordinates d|D(t)|^2/dt is the sum of three x,y,z components * each of which looks like this: * * t_lo / * ______/ -->t * / t_hi * / * * t_lo and t_hi are the t values where the line passes through the planes * corresponding to the sides of the box. the algorithm computes d|D(t)|^2/dt * in a piecewise fashion from t=0 to t=1, stopping at the point where * d|D(t)|^2/dt crosses from negative to positive. */ float DistanceLA(const PrimLineseg *linesegA, const PrimAABB *aabbB, Result *result) { qword linesegA_m_vert0 = *((qword *)( &linesegA->m_vert0) ); qword linesegA_m_vert1 = *((qword *)( &linesegA->m_vert1) ); qword aabbB_m_center = *((qword *)( &aabbB->m_center) ); qword aabbB_m_radii = *((qword *)( &aabbB->m_radii) ); qword result_m_ipA = *((qword *)( &result->m_ipA) ); qword result_m_ipB = *((qword *)( &result->m_ipB) ); // find the line segment point first end point in the coordinate frame of the AABB qword line_v0_offset = si_fs( linesegA_m_vert0, aabbB_m_center ); // find the line segment vector from the first point to the last qword line_vector = si_fs( linesegA_m_vert1, linesegA_m_vert0 ); const qword zero = si_from_int( 0 ); const qword one = si_from_int( 1 ); const qword neg_one = si_from_int( -1 ); const qword flt_half = si_from_float( 0.5f ); const qword flt_one = si_from_float( 1.f ); const qword flt_two = si_from_float( 2.f ); const qword flt_neg_one = si_from_float( -1.f ); const qword broadcast_x = si_ila( 0x10203); qword mask_lv_lt_zero = si_fcgt( zero, line_vector ); qword sign = si_selb( flt_one, flt_neg_one, mask_lv_lt_zero ); qword line_vector_mirrored = si_fm( line_vector, sign ); qword line_v0_offset_mirrored = si_fm( line_v0_offset, sign ); // domain (odd calls this region) is -1,0,+1 depending on which side of the box planes each // coordinate is on. t_anchor (ode calls this tanchor) is the next t value at which there is a // transition, or the last one if there are no more. qword neg_aabbB_m_radii = si_fm( aabbB_m_radii, flt_neg_one ); qword mask_lvm_gt_zero = si_fcgt( line_vector_mirrored, zero ); qword mask_lv0om_lt_neg_rad = si_fcgt( neg_aabbB_m_radii, line_v0_offset_mirrored ); qword recip_lvm = si_frest( line_vector_mirrored ); recip_lvm = si_fi( line_vector_mirrored, recip_lvm); qword t_anchor1 = si_fs( neg_aabbB_m_radii, line_v0_offset_mirrored ); t_anchor1 = si_fm( t_anchor1, recip_lvm ); // reused later... qword t_anchor2 = si_fs( aabbB_m_radii, line_v0_offset_mirrored ); t_anchor2 = si_fm( t_anchor2, recip_lvm ); qword t_anchor3 = si_selb( t_anchor2, t_anchor1, mask_lv0om_lt_neg_rad ); qword t_anchor = si_selb( flt_two, t_anchor3, mask_lvm_gt_zero ); qword lv0om_gt_rad_temp = si_fcgt( line_v0_offset_mirrored, aabbB_m_radii ); qword lv0om_gt_rad = si_shli( lv0om_gt_rad_temp, 0x1f ); qword domain_tmp = si_selb( lv0om_gt_rad, neg_one, mask_lv0om_lt_neg_rad ); qword domain = si_selb( zero, domain_tmp, mask_lvm_gt_zero ); // square the line segment vector qword line_vector_mirrored_2 = si_fm( line_vector_mirrored, line_vector_mirrored ); // compute deriv, the solution to d|d|^2/dt for t=0. if it's >= 0 then p1 is the closest point qword lvm2_mul_ta = si_fm( line_vector_mirrored_2, t_anchor ); qword mask_domain_eq_zero = si_ceq( domain, zero ); qword deriv_mult = si_selb( flt_neg_one, zero, mask_domain_eq_zero ); qword deriv = si_fm( lvm2_mul_ta, deriv_mult ); qword deriv_y = si_shlqbyi( deriv, 0x04 ); qword deriv_z = si_shlqbyi( deriv, 0x08 ); deriv = si_fa( deriv, deriv_y ); deriv = si_fa( deriv, deriv_z ); qword mask_deriv_gt_zero = si_fcgt( deriv, zero ); qword mask_deriv_eq_zero = si_fceq( deriv, zero ); qword solution = si_or( mask_deriv_gt_zero, mask_deriv_eq_zero ); // set initial t value qword t = zero; qword done = solution; while( !si_to_uint(done) ) { qword mask_t_lt_one; // t_next is the next t value, next_t in ode // find the point on the line that is at the next clip plane boundary qword t_next = flt_one; qword mask_0_anchor_gt_t = si_fcgt( t_anchor, t ); qword mask_0_anchor_lt_one = si_fcgt( flt_one, t_anchor ); qword mask_0_anchor_lt_tn = si_fcgt( t_next, t_anchor ); qword mask_0_combined = si_and( mask_0_anchor_gt_t, mask_0_anchor_lt_one ); mask_0_combined = si_and( mask_0_combined, mask_0_anchor_lt_tn ); t_next = si_selb( t_next, t_anchor, mask_0_combined ); qword anchor_y = si_shlqbyi( t_anchor, 0x04 ); qword mask_1_anchor_gt_t = si_fcgt( anchor_y, t ); qword mask_1_anchor_lt_one = si_fcgt( flt_one, anchor_y ); qword mask_1_anchor_lt_tn = si_fcgt( t_next, anchor_y ); qword mask_1_combined = si_and( mask_1_anchor_gt_t, mask_1_anchor_lt_one ); mask_1_combined = si_and( mask_1_combined, mask_1_anchor_lt_tn ); t_next = si_selb( t_next, anchor_y, mask_1_combined ); qword anchor_z = si_shlqbyi( t_anchor, 0x08 ); qword mask_2_anchor_gt_t = si_fcgt( anchor_z, t ); qword mask_2_anchor_lt_one = si_fcgt( flt_one, anchor_z ); qword mask_2_anchor_lt_tn = si_fcgt( t_next, anchor_z ); qword mask_2_combined = si_and( mask_2_anchor_gt_t, mask_2_anchor_lt_one ); mask_2_combined = si_and( mask_2_combined, mask_2_anchor_lt_tn ); t_next = si_selb( t_next, anchor_z, mask_2_combined ); // compute the deriv = d|d|^2/dt for the next t t_next = si_shufb( t_next, t_next, broadcast_x); qword mask_domain_eq_zero = si_ceqi( domain, 0x0 ); qword next_sub_anchor = si_fs( t_next, t_anchor ); qword mult_lvm2 = si_fm( line_vector_mirrored_2, next_sub_anchor ); qword deriv_mult = si_selb( flt_one, zero, mask_domain_eq_zero ); qword deriv_next = si_fm( mult_lvm2, deriv_mult ); qword deriv_next_y = si_shlqbyi( deriv_next, 0x04 ); qword deriv_next_z = si_shlqbyi( deriv_next, 0x08 ); deriv_next = si_fa( deriv_next, deriv_next_y ); deriv_next = si_fa( deriv_next, deriv_next_z ); // if the sign of d|d|^2/dt has changed, solution = the crossover point qword mask_dn_gt_zero = si_fcgt( deriv_next, zero ); qword mask_dn_eq_zero = si_fceq( deriv_next, zero ); qword mask_gte_zero = si_or( mask_dn_gt_zero, mask_dn_eq_zero ); if( si_to_uint(mask_gte_zero) ) { qword numer = si_fs( deriv_next, deriv ); qword denom = si_fs( t_next, t ); qword recip_denom = si_frest( denom ); recip_denom = si_fi( denom, recip_denom ); qword derivs = si_fm( numer, recip_denom ); qword recip_derivs = si_frest( derivs ); recip_derivs = si_fi( derivs, recip_derivs ); qword ratio = si_fm( deriv, recip_derivs ); t = si_fs( t, ratio ); solution = one; break; } // advance to the next anchor point / region qword mask_anchor_eq_tn = si_fceq( t_anchor, t_next ); t_anchor = si_selb( t_anchor, t_anchor2, mask_anchor_eq_tn ); qword domain_add_amt = si_selb( zero, one, mask_anchor_eq_tn ); domain = si_a( domain, domain_add_amt ); t = t_next; deriv = deriv_next; // we're done when t >= 1 qword mask_t_gt_one = si_fcgt( t, flt_one ); qword mask_t_eq_one = si_fceq( t, flt_one ); done = si_or( mask_t_gt_one, mask_t_eq_one ); } // p1 is the closest point solution = si_andc( one, solution ); t = si_shufb( t, t, broadcast_x); t = si_selb( t, flt_one, solution ); // compute closest point on the line // note: line_vector=p2-p1 result_m_ipA = si_fma( line_vector, t, linesegA_m_vert0 ); // compute closest point on the box qword scale = si_fma( t, line_vector_mirrored, line_v0_offset_mirrored ); line_v0_offset_mirrored = si_fm( sign, scale ); mask_lv0om_lt_neg_rad = si_fcgt( neg_aabbB_m_radii, line_v0_offset_mirrored ); line_v0_offset_mirrored = si_selb( line_v0_offset_mirrored, neg_aabbB_m_radii, mask_lv0om_lt_neg_rad ); qword mask_lv0om_gt_rad = si_fcgt( line_v0_offset_mirrored, aabbB_m_radii ); qword mask_combined = si_andc( mask_lv0om_gt_rad, mask_lv0om_lt_neg_rad ); line_v0_offset_mirrored = si_selb( line_v0_offset_mirrored, aabbB_m_radii, mask_combined ); result_m_ipB = si_fa( line_v0_offset_mirrored, aabbB_m_center ); // return the distance between the closest points qword delta = si_fs( result_m_ipB, result_m_ipA); qword delta_sqr = si_fm( delta, delta); qword delta_sqr_y = si_shlqbyi( delta_sqr, 0x04 ); qword delta_sqr_z = si_shlqbyi( delta_sqr, 0x08 ); qword len_sqr = si_fa( delta_sqr, delta_sqr_y ); len_sqr = si_fa( len_sqr, delta_sqr_z ); qword est = si_frsqest( len_sqr ); est = si_fi( len_sqr, est ); qword est_sqr = si_fm( est, est ); qword len = si_fnms( len_sqr, est_sqr, flt_one ); // perform single Newton-Raphson step to increase accuracy qword nr = si_fm( est, flt_half ); len = si_fma( len, nr, est ); // we have reciprocal square root, so now multiply to give 'normal' square root len = si_fm( len, len_sqr ); // handle the case where our input component is zero qword mask_eq_zero = si_fcgt( len_sqr, zero ); len = si_selb( zero, len, mask_eq_zero ); // convert results to output format *((qword *)(&result->m_ipA)) = result_m_ipA; *((qword *)(&result->m_ipB)) = result_m_ipB; return si_to_float(len); }