2 ===========================================================================
5 Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
7 This file is part of the Doom 3 GPL Source Code (?Doom 3 Source Code?).
9 Doom 3 Source Code is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
14 Doom 3 Source Code is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
22 In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
26 ===========================================================================
29 #include "../precompiled.h"
32 static idCVar lcp_showFailures( "lcp_showFailures", "0", CVAR_SYSTEM | CVAR_BOOL, "show LCP solver failures" );
34 const float LCP_BOUND_EPSILON = 1e-5f;
35 const float LCP_ACCEL_EPSILON = 1e-5f;
36 const float LCP_DELTA_ACCEL_EPSILON = 1e-9f;
37 const float LCP_DELTA_FORCE_EPSILON = 1e-9f;
39 #define IGNORE_UNSATISFIABLE_VARIABLES
41 //===============================================================
45 //===============================================================
47 class idLCP_Square : public idLCP {
49 virtual bool Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );
52 idMatX m; // original matrix
53 idVecX b; // right hand side
54 idVecX lo, hi; // low and high bounds
55 idVecX f, a; // force and acceleration
56 idVecX delta_f, delta_a; // delta force and delta acceleration
57 idMatX clamped; // LU factored sub matrix for clamped variables
58 idVecX diagonal; // reciprocal of diagonal of U of the LU factored sub matrix for clamped variables
59 int numUnbounded; // number of unbounded variables
60 int numClamped; // number of clamped variables
61 float ** rowPtrs; // pointers to the rows of m
62 int * boxIndex; // box index
63 int * side; // tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
64 int * permuted; // index to keep track of the permutation
65 bool padded; // set to true if the rows of the initial matrix are 16 byte padded
68 bool FactorClamped( void );
69 void SolveClamped( idVecX &x, const float *b );
70 void Swap( int i, int j );
71 void AddClamped( int r );
72 void RemoveClamped( int r );
73 void CalcForceDelta( int d, float dir );
74 void CalcAccelDelta( int d );
75 void ChangeForce( int d, float step );
76 void ChangeAccel( int d, float step );
77 void GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
82 idLCP_Square::FactorClamped
85 bool idLCP_Square::FactorClamped( void ) {
89 for ( i = 0; i < numClamped; i++ ) {
90 memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
93 for ( i = 0; i < numClamped; i++ ) {
95 s = idMath::Fabs( clamped[i][i] );
101 diagonal[i] = d = 1.0f / clamped[i][i];
102 for ( j = i + 1; j < numClamped; j++ ) {
106 for ( j = i + 1; j < numClamped; j++ ) {
108 for ( k = i + 1; k < numClamped; k++ ) {
109 clamped[j][k] -= d * clamped[i][k];
119 idLCP_Square::SolveClamped
122 void idLCP_Square::SolveClamped( idVecX &x, const float *b ) {
127 for ( i = 0; i < numClamped; i++ ) {
129 for ( j = 0; j < i; j++ ) {
130 sum -= clamped[i][j] * x[j];
136 for ( i = numClamped - 1; i >= 0; i-- ) {
138 for ( j = i + 1; j < numClamped; j++ ) {
139 sum -= clamped[i][j] * x[j];
141 x[i] = sum * diagonal[i];
150 void idLCP_Square::Swap( int i, int j ) {
156 idSwap( rowPtrs[i], rowPtrs[j] );
157 m.SwapColumns( i, j );
158 b.SwapElements( i, j );
159 lo.SwapElements( i, j );
160 hi.SwapElements( i, j );
161 a.SwapElements( i, j );
162 f.SwapElements( i, j );
164 idSwap( boxIndex[i], boxIndex[j] );
166 idSwap( side[i], side[j] );
167 idSwap( permuted[i], permuted[j] );
172 idLCP_Square::AddClamped
175 void idLCP_Square::AddClamped( int r ) {
179 assert( r >= numClamped );
181 // add a row at the bottom and a column at the right of the factored
182 // matrix for the clamped variables
184 Swap( numClamped, r );
187 for ( i = 0; i < numClamped; i++ ) {
188 sum = rowPtrs[numClamped][i];
189 for ( j = 0; j < i; j++ ) {
190 sum -= clamped[numClamped][j] * clamped[j][i];
192 clamped[numClamped][i] = sum * diagonal[i];
196 for ( i = 0; i <= numClamped; i++ ) {
197 sum = rowPtrs[i][numClamped];
198 for ( j = 0; j < i; j++ ) {
199 sum -= clamped[i][j] * clamped[j][numClamped];
201 clamped[i][numClamped] = sum;
204 diagonal[numClamped] = 1.0f / clamped[numClamped][numClamped];
211 idLCP_Square::RemoveClamped
214 void idLCP_Square::RemoveClamped( int r ) {
216 float *y0, *y1, *z0, *z1;
217 double diag, beta0, beta1, p0, p1, q0, q1, d;
219 assert( r < numClamped );
223 // no need to swap and update the factored matrix when the last row and column are removed
224 if ( r == numClamped ) {
228 y0 = (float *) _alloca16( numClamped * sizeof( float ) );
229 z0 = (float *) _alloca16( numClamped * sizeof( float ) );
230 y1 = (float *) _alloca16( numClamped * sizeof( float ) );
231 z1 = (float *) _alloca16( numClamped * sizeof( float ) );
233 // the row/column need to be subtracted from the factorization
234 for ( i = 0; i < numClamped; i++ ) {
235 y0[i] = -rowPtrs[i][r];
238 memset( y1, 0, numClamped * sizeof( float ) );
241 memset( z0, 0, numClamped * sizeof( float ) );
244 for ( i = 0; i < numClamped; i++ ) {
245 z1[i] = -rowPtrs[r][i];
248 // swap the to be removed row/column with the last row/column
249 Swap( r, numClamped );
251 // the swapped last row/column need to be added to the factorization
252 for ( i = 0; i < numClamped; i++ ) {
253 y0[i] += rowPtrs[i][r];
256 for ( i = 0; i < numClamped; i++ ) {
257 z1[i] += rowPtrs[r][i];
261 // update the beginning of the to be updated row and column
262 for ( i = 0; i < r; i++ ) {
264 beta1 = z1[i] * diagonal[i];
267 for ( j = i+1; j < numClamped; j++ ) {
268 z1[j] -= beta1 * clamped[i][j];
270 for ( j = i+1; j < numClamped; j++ ) {
271 y0[j] -= p0 * clamped[j][i];
273 clamped[r][i] += beta1;
276 // update the lower right corner starting at r,r
277 for ( i = r; i < numClamped; i++ ) {
278 diag = clamped[i][i];
284 if ( diag == 0.0f ) {
285 idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
295 if ( diag == 0.0f ) {
296 idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
303 clamped[i][i] = diag;
306 for ( j = i+1; j < numClamped; j++ ) {
319 for ( j = i+1; j < numClamped; j++ ) {
337 idLCP_Square::CalcForceDelta
339 modifies this->delta_f
342 ID_INLINE void idLCP_Square::CalcForceDelta( int d, float dir ) {
348 if ( numClamped == 0 ) {
352 // get column d of matrix
353 ptr = (float *) _alloca16( numClamped * sizeof( float ) );
354 for ( i = 0; i < numClamped; i++ ) {
355 ptr[i] = rowPtrs[i][d];
359 SolveClamped( delta_f, ptr );
361 // flip force delta based on direction
363 ptr = delta_f.ToFloatPtr();
364 for ( i = 0; i < numClamped; i++ ) {
372 idLCP_Square::CalcAccelDelta
374 modifies this->delta_a and uses this->delta_f
377 ID_INLINE void idLCP_Square::CalcAccelDelta( int d ) {
381 // only the not clamped variables, including the current variable, can have a change in acceleration
382 for ( j = numClamped; j <= d; j++ ) {
383 // only the clamped variables and the current variable have a force delta unequal zero
384 SIMDProcessor->Dot( dot, rowPtrs[j], delta_f.ToFloatPtr(), numClamped );
385 delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
391 idLCP_Square::ChangeForce
393 modifies this->f and uses this->delta_f
396 ID_INLINE void idLCP_Square::ChangeForce( int d, float step ) {
397 // only the clamped variables and current variable have a force delta unequal zero
398 SIMDProcessor->MulAdd( f.ToFloatPtr(), step, delta_f.ToFloatPtr(), numClamped );
399 f[d] += step * delta_f[d];
404 idLCP_Square::ChangeAccel
406 modifies this->a and uses this->delta_a
409 ID_INLINE void idLCP_Square::ChangeAccel( int d, float step ) {
410 // only the not clamped variables, including the current variable, can have an acceleration unequal zero
411 SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
416 idLCP_Square::GetMaxStep
419 void idLCP_Square::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
423 // default to a full step for the current variable
424 if ( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON ) {
425 maxStep = -a[d] / delta_a[d];
432 // test the current variable
434 if ( lo[d] != -idMath::INFINITY ) {
435 s = ( lo[d] - f[d] ) / dir;
442 if ( hi[d] != idMath::INFINITY ) {
443 s = ( hi[d] - f[d] ) / dir;
451 // test the clamped bounded variables
452 for ( i = numUnbounded; i < numClamped; i++ ) {
453 if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
454 // if there is a low boundary
455 if ( lo[i] != -idMath::INFINITY ) {
456 s = ( lo[i] - f[i] ) / delta_f[i];
463 } else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
464 // if there is a high boundary
465 if ( hi[i] != idMath::INFINITY ) {
466 s = ( hi[i] - f[i] ) / delta_f[i];
476 // test the not clamped bounded variables
477 for ( i = numClamped; i < d; i++ ) {
478 if ( side[i] == -1 ) {
479 if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
482 } else if ( side[i] == 1 ) {
483 if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
489 // ignore variables for which the force is not allowed to take any substantial value
490 if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
493 s = -a[i] / delta_a[i];
507 bool idLCP_Square::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
508 int i, j, n, limit, limitSide, boxStartIndex;
509 float dir, maxStep, dot, s;
512 // true when the matrix rows are 16 byte padded
513 padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();
515 assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
516 assert( o_x.GetSize() == o_m.GetNumRows() );
517 assert( o_b.GetSize() == o_m.GetNumRows() );
518 assert( o_lo.GetSize() == o_m.GetNumRows() );
519 assert( o_hi.GetSize() == o_m.GetNumRows() );
521 // allocate memory for permuted input
522 f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
523 a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
524 b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
525 lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
526 hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
528 boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
529 memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
534 // we override the const on o_m here but on exit the matrix is unchanged
535 m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
542 // pointers to the rows of m
543 rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
544 for ( i = 0; i < m.GetNumRows(); i++ ) {
548 // tells if a variable is at the low boundary, high boundary or inbetween
549 side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
551 // index to keep track of the permutation
552 permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
553 for ( i = 0; i < m.GetNumRows(); i++ ) {
557 // permute input so all unbounded variables come first
559 for ( i = 0; i < m.GetNumRows(); i++ ) {
560 if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
561 if ( numUnbounded != i ) {
562 Swap( numUnbounded, i );
568 // permute input so all variables using the boxIndex come last
569 boxStartIndex = m.GetNumRows();
571 for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
572 if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
574 if ( boxStartIndex != i ) {
575 Swap( boxStartIndex, i );
581 // sub matrix for factorization
582 clamped.SetData( m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA( m.GetNumRows() * m.GetNumColumns() ) );
583 diagonal.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
585 // all unbounded variables are clamped
586 numClamped = numUnbounded;
588 // if there are unbounded variables
589 if ( numUnbounded ) {
591 // factor and solve for unbounded variables
592 if ( !FactorClamped() ) {
593 idLib::common->Printf( "idLCP_Square::Solve: unbounded factorization failed\n" );
596 SolveClamped( f, b.ToFloatPtr() );
598 // if there are no bounded variables we are done
599 if ( numUnbounded == m.GetNumRows() ) {
600 o_x = f; // the vector is not permuted
605 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
609 // allocate for delta force and delta acceleration
610 delta_f.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
611 delta_a.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
613 // solve for bounded variables
615 for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {
617 // once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
618 if ( i == boxStartIndex ) {
619 for ( j = 0; j < boxStartIndex; j++ ) {
620 o_x[permuted[j]] = f[j];
622 for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
623 s = o_x[boxIndex[j]];
624 if ( lo[j] != -idMath::INFINITY ) {
625 lo[j] = - idMath::Fabs( lo[j] * s );
627 if ( hi[j] != idMath::INFINITY ) {
628 hi[j] = idMath::Fabs( hi[j] * s );
633 // calculate acceleration for current variable
634 SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
637 // if already at the low boundary
638 if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
643 // if already at the high boundary
644 if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
649 // if inside the clamped region
650 if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
656 // drive the current variable into a valid region
657 for ( n = 0; n < maxIterations; n++ ) {
660 if ( a[i] <= 0.0f ) {
666 // calculate force delta
667 CalcForceDelta( i, dir );
669 // calculate acceleration delta: delta_a = m * delta_f;
672 // maximum step we can take
673 GetMaxStep( i, dir, maxStep, limit, limitSide );
675 if ( maxStep <= 0.0f ) {
676 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
677 // ignore the current variable completely
678 lo[i] = hi[i] = 0.0f;
683 failed = va( "invalid step size %.4f", maxStep );
689 ChangeForce( i, maxStep );
691 // change acceleration
692 ChangeAccel( i, maxStep );
694 // clamp/unclamp the variable that limited this step
695 side[limit] = limitSide;
696 switch( limitSide ) {
703 f[limit] = lo[limit];
705 RemoveClamped( limit );
710 f[limit] = hi[limit];
712 RemoveClamped( limit );
718 // if the current variable limited the step we can continue with the next variable
724 if ( n >= maxIterations ) {
725 failed = va( "max iterations %d", maxIterations );
734 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
736 if ( lcp_showFailures.GetBool() ) {
737 idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
742 // if failed clear remaining forces
744 if ( lcp_showFailures.GetBool() ) {
745 idLib::common->Printf( "idLCP_Square::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
747 for ( j = i; j < m.GetNumRows(); j++ ) {
752 #if defined(_DEBUG) && 0
754 // test whether or not the solution satisfies the complementarity conditions
755 for ( i = 0; i < m.GetNumRows(); i++ ) {
757 for ( j = 0; j < m.GetNumRows(); j++ ) {
758 a[i] += rowPtrs[i][j] * f[j];
761 if ( f[i] == lo[i] ) {
762 if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
765 } else if ( f[i] == hi[i] ) {
766 if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
769 } else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
777 for ( i = 0; i < f.GetSize(); i++ ) {
778 o_x[permuted[i]] = f[i];
781 // unpermute original matrix
782 for ( i = 0; i < m.GetNumRows(); i++ ) {
783 for ( j = 0; j < m.GetNumRows(); j++ ) {
784 if ( permuted[j] == i ) {
789 m.SwapColumns( i, j );
790 idSwap( permuted[i], permuted[j] );
798 //===============================================================
800 // idLCP_Symmetric MrE
802 //===============================================================
804 class idLCP_Symmetric : public idLCP {
806 virtual bool Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );
809 idMatX m; // original matrix
810 idVecX b; // right hand side
811 idVecX lo, hi; // low and high bounds
812 idVecX f, a; // force and acceleration
813 idVecX delta_f, delta_a; // delta force and delta acceleration
814 idMatX clamped; // LDLt factored sub matrix for clamped variables
815 idVecX diagonal; // reciprocal of diagonal of LDLt factored sub matrix for clamped variables
816 idVecX solveCache1; // intermediate result cached in SolveClamped
817 idVecX solveCache2; // "
818 int numUnbounded; // number of unbounded variables
819 int numClamped; // number of clamped variables
820 int clampedChangeStart; // lowest row/column changed in the clamped matrix during an iteration
821 float ** rowPtrs; // pointers to the rows of m
822 int * boxIndex; // box index
823 int * side; // tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
824 int * permuted; // index to keep track of the permutation
825 bool padded; // set to true if the rows of the initial matrix are 16 byte padded
828 bool FactorClamped( void );
829 void SolveClamped( idVecX &x, const float *b );
830 void Swap( int i, int j );
831 void AddClamped( int r, bool useSolveCache );
832 void RemoveClamped( int r );
833 void CalcForceDelta( int d, float dir );
834 void CalcAccelDelta( int d );
835 void ChangeForce( int d, float step );
836 void ChangeAccel( int d, float step );
837 void GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
842 idLCP_Symmetric::FactorClamped
845 bool idLCP_Symmetric::FactorClamped( void ) {
847 clampedChangeStart = 0;
849 for ( int i = 0; i < numClamped; i++ ) {
850 memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
852 return SIMDProcessor->MatX_LDLTFactor( clamped, diagonal, numClamped );
857 idLCP_Symmetric::SolveClamped
860 void idLCP_Symmetric::SolveClamped( idVecX &x, const float *b ) {
863 SIMDProcessor->MatX_LowerTriangularSolve( clamped, solveCache1.ToFloatPtr(), b, numClamped, clampedChangeStart );
866 SIMDProcessor->Mul( solveCache2.ToFloatPtr(), solveCache1.ToFloatPtr(), diagonal.ToFloatPtr(), numClamped );
869 SIMDProcessor->MatX_LowerTriangularSolveTranspose( clamped, x.ToFloatPtr(), solveCache2.ToFloatPtr(), numClamped );
871 clampedChangeStart = numClamped;
876 idLCP_Symmetric::Swap
879 void idLCP_Symmetric::Swap( int i, int j ) {
885 idSwap( rowPtrs[i], rowPtrs[j] );
886 m.SwapColumns( i, j );
887 b.SwapElements( i, j );
888 lo.SwapElements( i, j );
889 hi.SwapElements( i, j );
890 a.SwapElements( i, j );
891 f.SwapElements( i, j );
893 idSwap( boxIndex[i], boxIndex[j] );
895 idSwap( side[i], side[j] );
896 idSwap( permuted[i], permuted[j] );
901 idLCP_Symmetric::AddClamped
904 void idLCP_Symmetric::AddClamped( int r, bool useSolveCache ) {
907 assert( r >= numClamped );
909 if ( numClamped < clampedChangeStart ) {
910 clampedChangeStart = numClamped;
913 // add a row at the bottom and a column at the right of the factored
914 // matrix for the clamped variables
916 Swap( numClamped, r );
918 // solve for v in L * v = rowPtr[numClamped]
919 if ( useSolveCache ) {
921 // the lower triangular solve was cached in SolveClamped called by CalcForceDelta
922 memcpy( clamped[numClamped], solveCache2.ToFloatPtr(), numClamped * sizeof( float ) );
923 // calculate row dot product
924 SIMDProcessor->Dot( dot, solveCache2.ToFloatPtr(), solveCache1.ToFloatPtr(), numClamped );
928 float *v = (float *) _alloca16( numClamped * sizeof( float ) );
930 SIMDProcessor->MatX_LowerTriangularSolve( clamped, v, rowPtrs[numClamped], numClamped );
931 // add bottom row to L
932 SIMDProcessor->Mul( clamped[numClamped], v, diagonal.ToFloatPtr(), numClamped );
933 // calculate row dot product
934 SIMDProcessor->Dot( dot, clamped[numClamped], v, numClamped );
937 // update diagonal[numClamped]
938 d = rowPtrs[numClamped][numClamped] - dot;
941 idLib::common->Printf( "idLCP_Symmetric::AddClamped: updating factorization failed\n" );
946 clamped[numClamped][numClamped] = d;
947 diagonal[numClamped] = 1.0f / d;
954 idLCP_Symmetric::RemoveClamped
957 void idLCP_Symmetric::RemoveClamped( int r ) {
959 float *addSub, *original, *v, *ptr, *v1, *v2, dot;
960 double sum, diag, newDiag, invNewDiag, p1, p2, alpha1, alpha2, beta1, beta2;
962 assert( r < numClamped );
964 if ( r < clampedChangeStart ) {
965 clampedChangeStart = r;
970 // no need to swap and update the factored matrix when the last row and column are removed
971 if ( r == numClamped ) {
975 // swap the to be removed row/column with the last row/column
976 Swap( r, numClamped );
978 // update the factored matrix
979 addSub = (float *) _alloca16( numClamped * sizeof( float ) );
983 if ( numClamped == 1 ) {
984 diag = rowPtrs[0][0];
985 if ( diag == 0.0f ) {
986 idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
989 clamped[0][0] = diag;
990 diagonal[0] = 1.0f / diag;
994 // calculate the row/column to be added to the lower right sub matrix starting at (r, r)
995 original = rowPtrs[numClamped];
997 addSub[0] = ptr[0] - original[numClamped];
998 for ( i = 1; i < numClamped; i++ ) {
999 addSub[i] = ptr[i] - original[i];
1004 v = (float *) _alloca16( numClamped * sizeof( float ) );
1006 // solve for v in L * v = rowPtr[r]
1007 SIMDProcessor->MatX_LowerTriangularSolve( clamped, v, rowPtrs[r], r );
1009 // update removed row
1010 SIMDProcessor->Mul( clamped[r], v, diagonal.ToFloatPtr(), r );
1012 // if the last row/column of the matrix is updated
1013 if ( r == numClamped - 1 ) {
1014 // only calculate new diagonal
1015 SIMDProcessor->Dot( dot, clamped[r], v, r );
1016 diag = rowPtrs[r][r] - dot;
1017 if ( diag == 0.0f ) {
1018 idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
1021 clamped[r][r] = diag;
1022 diagonal[r] = 1.0f / diag;
1026 // calculate the row/column to be added to the lower right sub matrix starting at (r, r)
1027 for ( i = 0; i < r; i++ ) {
1028 v[i] = clamped[r][i] * clamped[i][i];
1030 for ( i = r; i < numClamped; i++ ) {
1032 sum = clamped[r][r];
1034 sum = clamped[r][r] * clamped[i][r];
1037 for ( j = 0; j < r; j++ ) {
1038 sum += ptr[j] * v[j];
1040 addSub[i] = rowPtrs[r][i] - sum;
1044 // add row/column to the lower right sub matrix starting at (r, r)
1046 v1 = (float *) _alloca16( numClamped * sizeof( float ) );
1047 v2 = (float *) _alloca16( numClamped * sizeof( float ) );
1049 diag = idMath::SQRT_1OVER2;
1050 v1[r] = ( 0.5f * addSub[r] + 1.0f ) * diag;
1051 v2[r] = ( 0.5f * addSub[r] - 1.0f ) * diag;
1052 for ( i = r+1; i < numClamped; i++ ) {
1053 v1[i] = v2[i] = addSub[i] * diag;
1059 // simultaneous update/downdate of the sub matrix starting at (r, r)
1060 n = clamped.GetNumColumns();
1061 for ( i = r; i < numClamped; i++ ) {
1063 diag = clamped[i][i];
1065 newDiag = diag + alpha1 * p1 * p1;
1067 if ( newDiag == 0.0f ) {
1068 idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
1073 beta1 = p1 * alpha1;
1078 newDiag = diag + alpha2 * p2 * p2;
1080 if ( newDiag == 0.0f ) {
1081 idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
1085 clamped[i][i] = newDiag;
1086 diagonal[i] = invNewDiag = 1.0f / newDiag;
1088 alpha2 *= invNewDiag;
1089 beta2 = p2 * alpha2;
1092 // update column below diagonal (i,i)
1093 ptr = clamped.ToFloatPtr() + i;
1095 for ( j = i+1; j < numClamped - 1; j += 2 ) {
1097 float sum0 = ptr[(j+0)*n];
1098 float sum1 = ptr[(j+1)*n];
1100 v1[j+0] -= p1 * sum0;
1101 v1[j+1] -= p1 * sum1;
1103 sum0 += beta1 * v1[j+0];
1104 sum1 += beta1 * v1[j+1];
1106 v2[j+0] -= p2 * sum0;
1107 v2[j+1] -= p2 * sum1;
1109 sum0 += beta2 * v2[j+0];
1110 sum1 += beta2 * v2[j+1];
1112 ptr[(j+0)*n] = sum0;
1113 ptr[(j+1)*n] = sum1;
1116 for ( ; j < numClamped; j++ ) {
1121 sum += beta1 * v1[j];
1124 sum += beta2 * v2[j];
1133 idLCP_Symmetric::CalcForceDelta
1135 modifies this->delta_f
1138 ID_INLINE void idLCP_Symmetric::CalcForceDelta( int d, float dir ) {
1144 if ( numClamped == 0 ) {
1148 // solve force delta
1149 SolveClamped( delta_f, rowPtrs[d] );
1151 // flip force delta based on direction
1153 ptr = delta_f.ToFloatPtr();
1154 for ( i = 0; i < numClamped; i++ ) {
1162 idLCP_Symmetric::CalcAccelDelta
1164 modifies this->delta_a and uses this->delta_f
1167 ID_INLINE void idLCP_Symmetric::CalcAccelDelta( int d ) {
1171 // only the not clamped variables, including the current variable, can have a change in acceleration
1172 for ( j = numClamped; j <= d; j++ ) {
1173 // only the clamped variables and the current variable have a force delta unequal zero
1174 SIMDProcessor->Dot( dot, rowPtrs[j], delta_f.ToFloatPtr(), numClamped );
1175 delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
1181 idLCP_Symmetric::ChangeForce
1183 modifies this->f and uses this->delta_f
1186 ID_INLINE void idLCP_Symmetric::ChangeForce( int d, float step ) {
1187 // only the clamped variables and current variable have a force delta unequal zero
1188 SIMDProcessor->MulAdd( f.ToFloatPtr(), step, delta_f.ToFloatPtr(), numClamped );
1189 f[d] += step * delta_f[d];
1194 idLCP_Symmetric::ChangeAccel
1196 modifies this->a and uses this->delta_a
1199 ID_INLINE void idLCP_Symmetric::ChangeAccel( int d, float step ) {
1200 // only the not clamped variables, including the current variable, can have an acceleration unequal zero
1201 SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
1206 idLCP_Symmetric::GetMaxStep
1209 void idLCP_Symmetric::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
1213 // default to a full step for the current variable
1214 if ( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON ) {
1215 maxStep = -a[d] / delta_a[d];
1222 // test the current variable
1224 if ( lo[d] != -idMath::INFINITY ) {
1225 s = ( lo[d] - f[d] ) / dir;
1226 if ( s < maxStep ) {
1232 if ( hi[d] != idMath::INFINITY ) {
1233 s = ( hi[d] - f[d] ) / dir;
1234 if ( s < maxStep ) {
1241 // test the clamped bounded variables
1242 for ( i = numUnbounded; i < numClamped; i++ ) {
1243 if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
1244 // if there is a low boundary
1245 if ( lo[i] != -idMath::INFINITY ) {
1246 s = ( lo[i] - f[i] ) / delta_f[i];
1247 if ( s < maxStep ) {
1253 } else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
1254 // if there is a high boundary
1255 if ( hi[i] != idMath::INFINITY ) {
1256 s = ( hi[i] - f[i] ) / delta_f[i];
1257 if ( s < maxStep ) {
1266 // test the not clamped bounded variables
1267 for ( i = numClamped; i < d; i++ ) {
1268 if ( side[i] == -1 ) {
1269 if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
1272 } else if ( side[i] == 1 ) {
1273 if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
1279 // ignore variables for which the force is not allowed to take any substantial value
1280 if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
1283 s = -a[i] / delta_a[i];
1284 if ( s < maxStep ) {
1294 idLCP_Symmetric::Solve
1297 bool idLCP_Symmetric::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
1298 int i, j, n, limit, limitSide, boxStartIndex;
1299 float dir, maxStep, dot, s;
1302 // true when the matrix rows are 16 byte padded
1303 padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();
1305 assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
1306 assert( o_x.GetSize() == o_m.GetNumRows() );
1307 assert( o_b.GetSize() == o_m.GetNumRows() );
1308 assert( o_lo.GetSize() == o_m.GetNumRows() );
1309 assert( o_hi.GetSize() == o_m.GetNumRows() );
1311 // allocate memory for permuted input
1312 f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
1313 a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
1314 b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
1315 lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
1316 hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
1318 boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
1319 memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
1324 // we override the const on o_m here but on exit the matrix is unchanged
1325 m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
1332 // pointers to the rows of m
1333 rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
1334 for ( i = 0; i < m.GetNumRows(); i++ ) {
1338 // tells if a variable is at the low boundary, high boundary or inbetween
1339 side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
1341 // index to keep track of the permutation
1342 permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
1343 for ( i = 0; i < m.GetNumRows(); i++ ) {
1347 // permute input so all unbounded variables come first
1349 for ( i = 0; i < m.GetNumRows(); i++ ) {
1350 if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
1351 if ( numUnbounded != i ) {
1352 Swap( numUnbounded, i );
1358 // permute input so all variables using the boxIndex come last
1359 boxStartIndex = m.GetNumRows();
1361 for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
1362 if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
1364 if ( boxStartIndex != i ) {
1365 Swap( boxStartIndex, i );
1371 // sub matrix for factorization
1372 clamped.SetData( m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA( m.GetNumRows() * m.GetNumColumns() ) );
1373 diagonal.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
1374 solveCache1.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
1375 solveCache2.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
1377 // all unbounded variables are clamped
1378 numClamped = numUnbounded;
1380 // if there are unbounded variables
1381 if ( numUnbounded ) {
1383 // factor and solve for unbounded variables
1384 if ( !FactorClamped() ) {
1385 idLib::common->Printf( "idLCP_Symmetric::Solve: unbounded factorization failed\n" );
1388 SolveClamped( f, b.ToFloatPtr() );
1390 // if there are no bounded variables we are done
1391 if ( numUnbounded == m.GetNumRows() ) {
1392 o_x = f; // the vector is not permuted
1397 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
1401 // allocate for delta force and delta acceleration
1402 delta_f.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
1403 delta_a.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
1405 // solve for bounded variables
1407 for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {
1409 clampedChangeStart = 0;
1411 // once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
1412 if ( i == boxStartIndex ) {
1413 for ( j = 0; j < boxStartIndex; j++ ) {
1414 o_x[permuted[j]] = f[j];
1416 for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
1417 s = o_x[boxIndex[j]];
1418 if ( lo[j] != -idMath::INFINITY ) {
1419 lo[j] = - idMath::Fabs( lo[j] * s );
1421 if ( hi[j] != idMath::INFINITY ) {
1422 hi[j] = idMath::Fabs( hi[j] * s );
1427 // calculate acceleration for current variable
1428 SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
1431 // if already at the low boundary
1432 if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
1437 // if already at the high boundary
1438 if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
1443 // if inside the clamped region
1444 if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
1446 AddClamped( i, false );
1450 // drive the current variable into a valid region
1451 for ( n = 0; n < maxIterations; n++ ) {
1453 // direction to move
1454 if ( a[i] <= 0.0f ) {
1460 // calculate force delta
1461 CalcForceDelta( i, dir );
1463 // calculate acceleration delta: delta_a = m * delta_f;
1464 CalcAccelDelta( i );
1466 // maximum step we can take
1467 GetMaxStep( i, dir, maxStep, limit, limitSide );
1469 if ( maxStep <= 0.0f ) {
1470 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
1471 // ignore the current variable completely
1472 lo[i] = hi[i] = 0.0f;
1477 failed = va( "invalid step size %.4f", maxStep );
1483 ChangeForce( i, maxStep );
1485 // change acceleration
1486 ChangeAccel( i, maxStep );
1488 // clamp/unclamp the variable that limited this step
1489 side[limit] = limitSide;
1490 switch( limitSide ) {
1493 AddClamped( limit, ( limit == i ) );
1497 f[limit] = lo[limit];
1499 RemoveClamped( limit );
1504 f[limit] = hi[limit];
1506 RemoveClamped( limit );
1512 // if the current variable limited the step we can continue with the next variable
1518 if ( n >= maxIterations ) {
1519 failed = va( "max iterations %d", maxIterations );
1528 #ifdef IGNORE_UNSATISFIABLE_VARIABLES
1530 if ( lcp_showFailures.GetBool() ) {
1531 idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
1536 // if failed clear remaining forces
1538 if ( lcp_showFailures.GetBool() ) {
1539 idLib::common->Printf( "idLCP_Symmetric::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
1541 for ( j = i; j < m.GetNumRows(); j++ ) {
1546 #if defined(_DEBUG) && 0
1548 // test whether or not the solution satisfies the complementarity conditions
1549 for ( i = 0; i < m.GetNumRows(); i++ ) {
1551 for ( j = 0; j < m.GetNumRows(); j++ ) {
1552 a[i] += rowPtrs[i][j] * f[j];
1555 if ( f[i] == lo[i] ) {
1556 if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
1559 } else if ( f[i] == hi[i] ) {
1560 if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
1563 } else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
1571 for ( i = 0; i < f.GetSize(); i++ ) {
1572 o_x[permuted[i]] = f[i];
1575 // unpermute original matrix
1576 for ( i = 0; i < m.GetNumRows(); i++ ) {
1577 for ( j = 0; j < m.GetNumRows(); j++ ) {
1578 if ( permuted[j] == i ) {
1583 m.SwapColumns( i, j );
1584 idSwap( permuted[i], permuted[j] );
1592 //===============================================================
1596 //===============================================================
1603 idLCP *idLCP::AllocSquare( void ) {
1604 idLCP *lcp = new idLCP_Square;
1605 lcp->SetMaxIterations( 32 );
1611 idLCP::AllocSymmetric
1614 idLCP *idLCP::AllocSymmetric( void ) {
1615 idLCP *lcp = new idLCP_Symmetric;
1616 lcp->SetMaxIterations( 32 );
1625 idLCP::~idLCP( void ) {
1630 idLCP::SetMaxIterations
1633 void idLCP::SetMaxIterations( int max ) {
1634 maxIterations = max;
1639 idLCP::GetMaxIterations
1642 int idLCP::GetMaxIterations( void ) {
1643 return maxIterations;