.. _program_listing_file_SPlisHSPlasH_Elasticity_Elasticity_Kee2023.cpp: Program Listing for File Elasticity_Kee2023.cpp =============================================== |exhale_lsh| :ref:`Return to documentation for file ` (``SPlisHSPlasH/Elasticity/Elasticity_Kee2023.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "Elasticity_Kee2023.h" #include "SPlisHSPlasH/Simulation.h" #include "SPlisHSPlasH/Utilities/MathFunctions.h" #include "SPlisHSPlasH/TimeManager.h" #include "Utilities/Timing.h" #include "Utilities/Counting.h" #include #include "Utilities/BinaryFileReaderWriter.h" #include "Utilities/StringTools.h" #include #include #include using namespace SPH; using namespace GenParam; std::string Elasticity_Kee2023::METHOD_NAME = "Kee et al. 2023"; int Elasticity_Kee2023::YOUNGS_MODULUS = -1; int Elasticity_Kee2023::POISSON_RATIO = -1; int Elasticity_Kee2023::FIXED_BOX_MIN = -1; int Elasticity_Kee2023::FIXED_BOX_MAX = -1; int Elasticity_Kee2023::FIXED_BOX2_MIN = -1; int Elasticity_Kee2023::FIXED_BOX2_MAX = -1; int Elasticity_Kee2023::ALPHA = -1; int Elasticity_Kee2023::MAX_NEIGHBORS = -1; int Elasticity_Kee2023::SOLVER_TYPE = -1; int Elasticity_Kee2023::LBFGS_WINDOW_SIZE = -1; int Elasticity_Kee2023::MATERIAL_TYPE = -1; int Elasticity_Kee2023::MAX_ITER = -1; int Elasticity_Kee2023::MAX_ERROR = -1; int Elasticity_Kee2023::MAX_ITER_CG = -1; int Elasticity_Kee2023::TOL_CG = -1; int Elasticity_Kee2023::MAX_LS_ITER = -1; int Elasticity_Kee2023::LS_ARMIJO_PARAM = -1; int Elasticity_Kee2023::LS_BETA = -1; int Elasticity_Kee2023::USE_LINE_SEARCH = -1; int Elasticity_Kee2023::ENUM_SOLVER_NEWTON = -1; int Elasticity_Kee2023::ENUM_SOLVER_LBFGS = -1; int Elasticity_Kee2023::ENUM_MATERIAL_STABLE_NEOHOOKEAN = -1; int Elasticity_Kee2023::ENUM_MATERIAL_COROTATED = -1; Elasticity_Kee2023::Elasticity_Kee2023(FluidModel *model) : NonPressureForceBase(model) { const unsigned int numParticles = model->numActiveParticles(); m_restVolumes.resize(numParticles); m_current_to_initial_index.resize(numParticles); m_initial_to_current_index.resize(numParticles); m_initialNeighbors.resize(numParticles); m_rotations.resize(numParticles, Matrix3r::Identity()); m_stress.resize(numParticles); m_fixedGroupId.resize(numParticles, 0); m_L.resize(numParticles); // kernel gradient correction matrix L m_F.resize(numParticles); // deformation gradient m_PL.resize(numParticles); // stores the rotation matrix times the matrix L m_youngsModulus = static_cast(5000000.0); m_poissonRatio = static_cast(0.45); m_alpha = 0.0; m_maxNeighbors = -1; m_fixedBoxMin.setZero(); m_fixedBoxMax.setZero(); m_fixedBox2Min.setZero(); m_fixedBox2Max.setZero(); m_solverType = 1; m_lbfgsWindowSize = 5; m_materialType = 1; // Co-rotated m_maxIter = 100; m_maxError = static_cast(1e-6); m_maxIterCG = 100; m_tolCG = static_cast(1e-4); m_maxLSIter = 20; m_lsArmijoParam = static_cast(1e-4); m_lsBeta = static_cast(0.5); m_useLineSearch = true; model->addField({ "rest volume", METHOD_NAME, FieldType::Scalar, [&](const unsigned int i) -> Real* { return &m_restVolumes[i]; }, true }); model->addField({ "rotation", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_rotations[i](0,0); } }); model->addField({ "stress", METHOD_NAME, FieldType::Scalar, [&](const unsigned int i) -> Real* { return &m_stress[i]; } }); model->addField({ "deformation gradient", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_F[i](0,0); } }); model->addField({ "correction matrix", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_L[i](0,0); } }); } Elasticity_Kee2023::~Elasticity_Kee2023(void) { m_model->removeFieldByName("rest volume"); m_model->removeFieldByName("rotation"); m_model->removeFieldByName("stress"); m_model->removeFieldByName("deformation gradient"); m_model->removeFieldByName("correction matrix"); for (auto objIndex = 0; objIndex < m_objects.size(); objIndex++) { delete m_objects[objIndex]; } m_objects.clear(); } void Elasticity_Kee2023::deferredInit() { initValues(); } void Elasticity_Kee2023::initParameters() { NonPressureForceBase::initParameters(); ParameterBase::GetFunc getFctYM = [&]()-> Real { return m_youngsModulus; }; ParameterBase::SetFunc setFctYM = [&](Real val) { m_youngsModulus = val; m_mu = m_youngsModulus / (static_cast(2.0) * (static_cast(1.0) + m_poissonRatio)); m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast(1.0) + m_poissonRatio) * (static_cast(1.0) - static_cast(2.0) * m_poissonRatio)); if (Simulation::getCurrent()->isSimulationInitialized()) // if Young's modulus has changed, recompute the factorization Simulation::getCurrent()->reset(); }; YOUNGS_MODULUS = createNumericParameter("youngsModulus", "Young`s modulus", getFctYM, setFctYM); setGroup(YOUNGS_MODULUS, "Fluid Model|Elasticity"); setDescription(YOUNGS_MODULUS, "Stiffness of the elastic material"); RealParameter* rparam = static_cast(getParameter(YOUNGS_MODULUS)); rparam->setMinValue(0.0); ParameterBase::GetFunc getFctPR = [&]()-> Real { return m_poissonRatio; }; ParameterBase::SetFunc setFctPR = [&](Real val) { m_poissonRatio = val; m_mu = m_youngsModulus / (static_cast(2.0) * (static_cast(1.0) + m_poissonRatio)); m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast(1.0) + m_poissonRatio) * (static_cast(1.0) - static_cast(2.0) * m_poissonRatio)); if (Simulation::getCurrent()->isSimulationInitialized()) // if Poisson ration has changed, recompute the factorization Simulation::getCurrent()->reset(); }; POISSON_RATIO = createNumericParameter("poissonsRatio", "Poisson`s ratio", getFctPR, setFctPR); setGroup(POISSON_RATIO, "Fluid Model|Elasticity"); setDescription(POISSON_RATIO, "Ratio of transversal expansion and axial compression"); rparam = static_cast(getParameter(POISSON_RATIO)); rparam->setMinValue(static_cast(-1.0 + 1e-4)); rparam->setMaxValue(static_cast(0.5 - 1e-4)); ParameterBase::GetFunc getFctAlpha = [&]()-> Real { return m_alpha; }; ParameterBase::SetFunc setFctAlpha = [&](Real val) { m_alpha = val; if (Simulation::getCurrent()->isSimulationInitialized()) // if value has changed, recompute the factorization Simulation::getCurrent()->reset(); }; ALPHA = createNumericParameter("alpha", "Zero-energy modes suppression", getFctAlpha, setFctAlpha); setGroup(ALPHA, "Fluid Model|Elasticity"); setDescription(ALPHA, "Coefficent for zero-energy modes suppression method"); rparam = static_cast(getParameter(ALPHA)); rparam->setMinValue(0.0); ParameterBase::GetFunc getFct5 = [&]()-> int { return m_maxNeighbors; }; ParameterBase::SetFunc setFct5 = [&](int val) { m_maxNeighbors = val; if (Simulation::getCurrent()->isSimulationInitialized()) // if value has changed, recompute the factorization Simulation::getCurrent()->reset(); }; MAX_NEIGHBORS = createNumericParameter("maxNeighbors", "Max. neighbors", getFct5, setFct5); setGroup(MAX_NEIGHBORS, "Fluid Model|Elasticity"); setDescription(MAX_NEIGHBORS, "Maximum number of neighbors that are considered."); ParameterBase::GetVecFunc getFctFMin = [&]()-> Real* { return m_fixedBoxMin.data(); }; ParameterBase::SetVecFunc setFctFMin = [&](Real* val) { m_fixedBoxMin = Vector3r(val[0], val[1], val[2]); determineFixedParticles(); }; FIXED_BOX_MIN = createVectorParameter("fixedBoxMin", "Fixed box min", 3u, getFctFMin, setFctFMin); setGroup(FIXED_BOX_MIN, "Fluid Model|Elasticity"); setDescription(FIXED_BOX_MIN, "Minimum point of box of which contains the fixed particles."); getParameter(FIXED_BOX_MIN)->setReadOnly(true); ParameterBase::GetVecFunc getFctFMax = [&]()-> Real* { return m_fixedBoxMax.data(); }; ParameterBase::SetVecFunc setFctFMax = [&](Real* val) { m_fixedBoxMax = Vector3r(val[0], val[1], val[2]); determineFixedParticles(); }; FIXED_BOX_MAX = createVectorParameter("fixedBoxMax", "Fixed box max", 3u, getFctFMax, setFctFMax); setGroup(FIXED_BOX_MAX, "Fluid Model|Elasticity"); setDescription(FIXED_BOX_MAX, "Maximum point of box of which contains the fixed particles."); getParameter(FIXED_BOX_MAX)->setReadOnly(true); ParameterBase::GetVecFunc getFctF2Min = [&]()-> Real* { return m_fixedBox2Min.data(); }; ParameterBase::SetVecFunc setFctF2Min = [&](Real* val) { m_fixedBox2Min = Vector3r(val[0], val[1], val[2]); determineFixedParticles(); }; FIXED_BOX2_MIN = createVectorParameter("fixedBox2Min", "Fixed box 2 min", 3u, getFctF2Min, setFctF2Min); setGroup(FIXED_BOX2_MIN, "Fluid Model|Elasticity"); setDescription(FIXED_BOX2_MIN, "Minimum point of second box containing fixed particles."); getParameter(FIXED_BOX2_MIN)->setReadOnly(true); ParameterBase::GetVecFunc getFctF2Max = [&]()-> Real* { return m_fixedBox2Max.data(); }; ParameterBase::SetVecFunc setFctF2Max = [&](Real* val) { m_fixedBox2Max = Vector3r(val[0], val[1], val[2]); determineFixedParticles(); }; FIXED_BOX2_MAX = createVectorParameter("fixedBox2Max", "Fixed box 2 max", 3u, getFctF2Max, setFctF2Max); setGroup(FIXED_BOX2_MAX, "Fluid Model|Elasticity"); setDescription(FIXED_BOX2_MAX, "Maximum point of second box containing fixed particles."); getParameter(FIXED_BOX2_MAX)->setReadOnly(true); SOLVER_TYPE = createEnumParameter("solverType", "Solver type", &m_solverType); setGroup(SOLVER_TYPE, "Fluid Model|Elasticity"); setDescription(SOLVER_TYPE, "Solver used for the elasticity computation."); EnumParameter* enumParam = static_cast(getParameter(SOLVER_TYPE)); enumParam->addEnumValue("Newton", ENUM_SOLVER_NEWTON); enumParam->addEnumValue("L-BFGS", ENUM_SOLVER_LBFGS); LBFGS_WINDOW_SIZE = createNumericParameter("lbfgsWindowSize", "L-BFGS window size", &m_lbfgsWindowSize); setGroup(LBFGS_WINDOW_SIZE, "Fluid Model|Elasticity"); setDescription(LBFGS_WINDOW_SIZE, "Number of past iterations stored for L-BFGS approximation (only for L-BFGS)."); static_cast*>(getParameter(LBFGS_WINDOW_SIZE))->setMinValue(0); MAX_ITER = createNumericParameter("maxIterations", "Max. iterations", &m_maxIter); setGroup(MAX_ITER, "Fluid Model|Elasticity"); setDescription(MAX_ITER, "Maximum number of solver iterations per time step."); static_cast*>(getParameter(MAX_ITER))->setMinValue(1); MAX_ERROR = createNumericParameter("maxError", "Max. error", &m_maxError); setGroup(MAX_ERROR, "Fluid Model|Elasticity"); setDescription(MAX_ERROR, "Convergence threshold on infinity norm of position update."); rparam = static_cast(getParameter(MAX_ERROR)); rparam->setMinValue(static_cast(1e-15)); MAX_ITER_CG = createNumericParameter("maxIterationsCG", "Max. CG iterations", &m_maxIterCG); setGroup(MAX_ITER_CG, "Fluid Model|Elasticity"); setDescription(MAX_ITER_CG, "Maximum number of CG iterations for Newton linear solve."); static_cast*>(getParameter(MAX_ITER_CG))->setMinValue(1); TOL_CG = createNumericParameter("tolCG", "CG tolerance", &m_tolCG); setGroup(TOL_CG, "Fluid Model|Elasticity"); setDescription(TOL_CG, "Convergence tolerance for CG solver."); rparam = static_cast(getParameter(TOL_CG)); rparam->setMinValue(static_cast(1e-15)); MAX_LS_ITER = createNumericParameter("maxLSIterations", "Max. LS iterations", &m_maxLSIter); setGroup(MAX_LS_ITER, "Fluid Model|Elasticity"); setDescription(MAX_LS_ITER, "Maximum number of backtracking line search iterations."); static_cast*>(getParameter(MAX_LS_ITER))->setMinValue(1); LS_ARMIJO_PARAM = createNumericParameter("lsArmijoParam", "LS Armijo c1", &m_lsArmijoParam); setGroup(LS_ARMIJO_PARAM, "Fluid Model|Elasticity"); setDescription(LS_ARMIJO_PARAM, "Armijo sufficient decrease parameter for line search."); rparam = static_cast(getParameter(LS_ARMIJO_PARAM)); rparam->setMinValue(static_cast(1e-10)); rparam->setMaxValue(static_cast(0.5)); LS_BETA = createNumericParameter("lsBeta", "LS backtracking factor", &m_lsBeta); setGroup(LS_BETA, "Fluid Model|Elasticity"); setDescription(LS_BETA, "Step size reduction factor per line search iteration."); rparam = static_cast(getParameter(LS_BETA)); rparam->setMinValue(static_cast(0.01)); rparam->setMaxValue(static_cast(0.99)); USE_LINE_SEARCH = createBoolParameter("useLineSearch", "Use line search", &m_useLineSearch); setGroup(USE_LINE_SEARCH, "Fluid Model|Elasticity"); setDescription(USE_LINE_SEARCH, "Enable backtracking Armijo line search."); MATERIAL_TYPE = createEnumParameter("materialType", "Material type", &m_materialType); setGroup(MATERIAL_TYPE, "Fluid Model|Elasticity"); setDescription(MATERIAL_TYPE, "Constitutive model for the elastic material."); enumParam = static_cast(getParameter(MATERIAL_TYPE)); enumParam->addEnumValue("Stable Neo-Hookean", ENUM_MATERIAL_STABLE_NEOHOOKEAN); enumParam->addEnumValue("Co-rotated", ENUM_MATERIAL_COROTATED); } void Elasticity_Kee2023::determineFixedParticles() { const unsigned int numParticles = m_model->numActiveParticles(); auto inBox = [](const Vector3r& x, const Vector3r& bmin, const Vector3r& bmax) -> bool { return (x[0] > bmin[0]) && (x[1] > bmin[1]) && (x[2] > bmin[2]) && (x[0] < bmax[0]) && (x[1] < bmax[1]) && (x[2] < bmax[2]); }; const bool hasBox1 = !m_fixedBoxMin.isZero() || !m_fixedBoxMax.isZero(); const bool hasBox2 = !m_fixedBox2Min.isZero() || !m_fixedBox2Max.isZero(); if (hasBox1 || hasBox2) { for (int i = 0; i < (int)numParticles; i++) { const Vector3r& x = m_model->getPosition0(i); if (hasBox1 && inBox(x, m_fixedBoxMin, m_fixedBoxMax)) { m_model->setParticleState(i, ParticleState::Fixed); m_fixedGroupId[i] = 1; } else if (hasBox2 && inBox(x, m_fixedBox2Min, m_fixedBox2Max)) { m_model->setParticleState(i, ParticleState::Fixed); m_fixedGroupId[i] = 2; } } } } std::string Elasticity_Kee2023::computeMD5(const unsigned int objIndex) { ElasticObject* obj = m_objects[objIndex]; auto& group = obj->m_particleIndices; auto numParticles = group.size(); std::vector tempN; tempN.resize(obj->m_particleIndices.size() * 2); for (size_t i = 0; i < numParticles; i++) { const unsigned int particleIndex = group[i]; const size_t numNeighbors = m_initialNeighbors[particleIndex].size(); tempN[2 * i] = static_cast(numNeighbors); tempN[2 * i + 1] = 0; for (auto j = 0; j < numNeighbors; j++) tempN[2 * i + 1] += m_initialNeighbors[particleIndex][j] - group[0]; } // compute MD5 checksum for all particle positions, this is used for the cache file MD5 context((unsigned char*)&tempN[0], static_cast(2 * numParticles * sizeof(unsigned int))); char* md5hex = context.hex_digest(); tempN.clear(); return std::string(md5hex); } void Elasticity_Kee2023::initValues() { Simulation *sim = Simulation::getCurrent(); sim->getNeighborhoodSearch()->find_neighbors(); FluidModel *model = m_model; const unsigned int numParticles = model->numActiveParticles(); const unsigned int fluidModelIndex = model->getPointSetIndex(); m_totalNeighbors = 0u; // Store the neighbors in the reference configurations and // compute the volume of each particle in rest state #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { m_current_to_initial_index[i] = i; m_initial_to_current_index[i] = i; // reset particle state if (m_model->getParticleState(i) == ParticleState::Fixed) m_model->setParticleState(i, ParticleState::Active); // only neighbors in same phase will influence elasticity unsigned int numNeighbors = sim->numberOfNeighbors(fluidModelIndex, fluidModelIndex, i); m_initialNeighbors[i].resize(numNeighbors); for (unsigned int j = 0; j < numNeighbors; j++) m_initialNeighbors[i][j] = sim->getNeighbor(fluidModelIndex, fluidModelIndex, i, j); // if maxNeighbors is set, then sort all neighbors wrt. to their distance to xi // and only take the maxNeighbors next ones. if (m_maxNeighbors > 0) { struct Comparator { Comparator(const Vector3r& xi, Vector3r* x) : m_xi(xi), m_x(x) {}; bool operator()(unsigned int a, unsigned int b) { return (m_x[a] - m_xi).squaredNorm() < (m_x[b] - m_xi).squaredNorm(); } Vector3r m_xi; Vector3r* m_x; }; // sort the neighbors according to their distance std::sort(m_initialNeighbors[i].begin(), m_initialNeighbors[i].end(), Comparator(model->getPosition0(i), &model->getPosition0(0))); // take only the next maxNeighbors if (m_initialNeighbors[i].size() > m_maxNeighbors) { numNeighbors = m_maxNeighbors; m_initialNeighbors[i].resize(m_maxNeighbors); } } m_totalNeighbors += numNeighbors; m_rotations[i].setIdentity(); m_stress[i] = 0.0; m_F[i].setIdentity(); m_PL[i].setIdentity(); } } // Symmetrize neighbor lists: if j∈N(i) but i∉N(j), add i to N(j) if (m_maxNeighbors > 0) { for (unsigned int i = 0; i < numParticles; i++) { for (size_t jn = 0; jn < m_initialNeighbors[i].size(); jn++) { unsigned int j = m_initialNeighbors[i][jn]; bool found = false; for (size_t kn = 0; kn < m_initialNeighbors[j].size(); kn++) { if (m_initialNeighbors[j][kn] == i) { found = true; break; } } if (!found) m_initialNeighbors[j].push_back(i); } } } // Check neighbor list symmetry { unsigned int maxActualNeighbors = 0; int asymmetricPairs = 0; for (unsigned int i = 0; i < numParticles; i++) { if (m_initialNeighbors[i].size() > maxActualNeighbors) maxActualNeighbors = (unsigned int)m_initialNeighbors[i].size(); for (size_t jn = 0; jn < m_initialNeighbors[i].size(); jn++) { unsigned int j = m_initialNeighbors[i][jn]; bool found = false; for (size_t kn = 0; kn < m_initialNeighbors[j].size(); kn++) { if (m_initialNeighbors[j][kn] == i) { found = true; break; } } if (!found) asymmetricPairs++; } } LOG_INFO << "Neighbor stats: maxNeighbors=" << maxActualNeighbors << ", asymmetricPairs=" << asymmetricPairs; } // Compute rest volumes using final (symmetrized) neighbor lists for (unsigned int i = 0; i < numParticles; i++) { Real density = model->getMass(i) * sim->W_zero(); const Vector3r &xi0 = model->getPosition0(i); for (size_t j = 0; j < m_initialNeighbors[i].size(); j++) { const unsigned int neighborIndex0 = m_initialNeighbors[i][j]; const Vector3r& xj0 = model->getPosition0(neighborIndex0); density += model->getMass(neighborIndex0) * sim->W(xi0 - xj0); } m_restVolumes[i] = model->getMass(i) / density; } // mark all particles in the bounding box as fixed determineFixedParticles(); // find separate objects START_TIMING("findObjects") findObjects(); STOP_TIMING_AVG; // if we find the same object, copy the neighborhood info size_t numObjects = m_objects.size(); auto& fluidInfos = sim->getFluidInfos(); for (auto objIndex = 1; objIndex < numObjects; objIndex++) { bool foundSameObj = false; int objIndex2; for (objIndex2 = 0; objIndex2 < objIndex; objIndex2++) { if (fluidInfos[objIndex].hasSameParticleSampling(fluidInfos[objIndex2])) { foundSameObj = true; break; } } if (foundSameObj) { ElasticObject* obj = m_objects[objIndex]; ElasticObject* obj0 = m_objects[objIndex2]; const std::vector& group = obj->m_particleIndices; const std::vector& group0 = obj0->m_particleIndices; int numParticles = (int)group.size(); int offset = group[0]; int offset0 = group0[0]; for (int i = 0; i < (int)numParticles; i++) { int particleIndex = group[i]; int particleIndex0 = group0[i]; const unsigned int i0 = m_current_to_initial_index[particleIndex]; const unsigned int i00 = m_current_to_initial_index[particleIndex0]; const size_t numNeighbors = m_initialNeighbors[i00].size(); m_initialNeighbors[i0].resize(numNeighbors); for (int j = 0; j < numNeighbors; j++) { m_initialNeighbors[i0][j] = m_initialNeighbors[i00][j] - offset0 + offset; } } } } // compute kernel gradient correction matrix START_TIMING("computeMatrixL") computeMatrixL(); STOP_TIMING_AVG; // init factorization START_TIMING("initSystem") initSystem(); STOP_TIMING_AVG; } void Elasticity_Kee2023::findObjects() { Simulation *sim = Simulation::getCurrent(); const unsigned int numParticles = m_model->numActiveParticles(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); FluidModel *model = m_model; m_objects.clear(); std::map obj2group; for (unsigned int i = 0; i < numParticles; i++) { const unsigned int objId = m_model->getObjectId(i); // search for object id in map if (obj2group.find(objId) == obj2group.end()) { // if id was not found, then we have a new object m_objects.push_back(new ElasticObject()); const unsigned int groupIndex = (unsigned int)(m_objects.size()-1); obj2group[objId] = groupIndex; const unsigned int i0 = m_current_to_initial_index[i]; m_objects[groupIndex]->m_particleIndices.push_back(i0); } else { // object already exists const unsigned int groupIndex = obj2group[objId]; const unsigned int i0 = m_current_to_initial_index[i]; m_objects[groupIndex]->m_particleIndices.push_back(i0); } } // For each object sort the particles so that all fixed particles are at the end of the list. // This is needed for the factorization to exclude fixed particles. for (size_t groupIndex = 0; groupIndex < m_objects.size(); groupIndex++) { struct Comparator { Comparator(Elasticity_Kee2023* _this) : m_this(_this) {}; bool operator()(unsigned int a, unsigned int b) { if ((m_this->m_model->getParticleState(a) != ParticleState::Active) && (m_this->m_model->getParticleState(b) == ParticleState::Active)) return false; else if ((m_this->m_model->getParticleState(a) == ParticleState::Active) && (m_this->m_model->getParticleState(b) != ParticleState::Active)) return true; else if ((m_this->m_model->getParticleState(a) != ParticleState::Active) && (m_this->m_model->getParticleState(b) != ParticleState::Active)) return a < b; else return a < b; } Elasticity_Kee2023* m_this; }; std::sort(m_objects[groupIndex]->m_particleIndices.begin(), m_objects[groupIndex]->m_particleIndices.end(), Comparator(this)); m_objects[groupIndex]->m_nFixed = 0; for (size_t i = 0; i < m_objects[groupIndex]->m_particleIndices.size(); i++) { if (m_model->getParticleState(m_objects[groupIndex]->m_particleIndices[i]) != ParticleState::Active) m_objects[groupIndex]->m_nFixed++; } LOG_INFO << "Object " << groupIndex << " - fixed particles: " << m_objects[groupIndex]->m_nFixed; } } void Elasticity_Kee2023::initSystem() { Simulation *sim = Simulation::getCurrent(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); const Real dt = TimeManager::getCurrent()->getTimeStepSize(); // Compute Lamé parameters m_mu = m_youngsModulus / (static_cast(2.0) * (static_cast(1.0) + m_poissonRatio)); m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast(1.0) + m_poissonRatio) * (static_cast(1.0) - static_cast(2.0) * m_poissonRatio)); FluidModel *model = m_model; size_t numObjects = m_objects.size(); auto& fluidInfos = sim->getFluidInfos(); for (auto objIndex = 0; objIndex < numObjects; objIndex++) { ElasticObject* obj = m_objects[objIndex]; // compute MD5 check sum std::string md5 = computeMD5(objIndex); // check if object with same md5 already exists bool foundFactorization = false; for (size_t i = 0; i < objIndex; i++) { // reuse the factorization for all objects with the same particle sampling to reduce the memory consumption if (fluidInfos[objIndex].hasSameParticleSampling(fluidInfos[i])) { m_objects[objIndex]->m_factorization = m_objects[i]->m_factorization; foundFactorization = true; LOG_INFO << "Object " << objIndex << " is using the factorization of object " << i; break; } } // if no factorization was found, create a new one if (obj->m_factorization == nullptr) obj->m_factorization = std::make_shared(); // generate file name for cache file std::string baseName = Utilities::FileSystem::getFileName(fluidInfos[objIndex].samplesFile); std::string ext = Utilities::FileSystem::getFileExt(fluidInfos[objIndex].samplesFile); std::string cacheFileName = sim->getCachePath() + "/" + baseName + "_" + ext + "_" + std::to_string(fluidInfos[objIndex].mode) + "_" + md5 + "_" + Utilities::StringTools::real2String(dt) + "_" + Utilities::StringTools::real2String(m_mu) + "_" + Utilities::StringTools::real2String(m_alpha) + ".bin"; // Fluid block if (fluidInfos[objIndex].type == 0) { Vector3r diag = (fluidInfos[objIndex].box.max() - fluidInfos[objIndex].box.min()); cacheFileName = sim->getCachePath() + "/Block_" + Utilities::StringTools::real2String(diag.x()) + "_" + Utilities::StringTools::real2String(diag.y()) + "_" + Utilities::StringTools::real2String(diag.z()) + "_" + std::to_string(fluidInfos[objIndex].mode) + "_" + md5 + "_" + Utilities::StringTools::real2String(dt) + "_" + Utilities::StringTools::real2String(m_mu) + "_" + Utilities::StringTools::real2String(m_alpha) + ".bin"; } // check if cache file exists const bool foundCacheFile = Utilities::FileSystem::fileExists(cacheFileName); if (sim->getUseCache() && foundCacheFile) { // if factorization cannot be reused from another object and a cache file was found, load the cache file if (!foundFactorization) { LOG_INFO << "Read cached factorization: " << cacheFileName; BinaryFileReader binReader; binReader.openFile(cacheFileName); binReader.readSparseMatrix(obj->m_factorization->m_D); binReader.readSparseMatrix(obj->m_factorization->m_DT_K); binReader.read(obj->m_factorization->m_dt); binReader.read(obj->m_factorization->m_mu); binReader.readSparseMatrix(obj->m_factorization->m_matHTH); #ifdef USE_AVX delete obj->m_factorization->m_cholesky; obj->m_factorization->m_cholesky = new CholeskyAVXSolver(); obj->m_factorization->m_cholesky->load(binReader); #else binReader.readSparseMatrix(obj->m_factorization->m_matL); binReader.readSparseMatrix(obj->m_factorization->m_matLT); binReader.readMatrixX(obj->m_factorization->m_permInd); binReader.readMatrixX(obj->m_factorization->m_permInvInd); #endif binReader.closeFile(); } // init vectors int numParticles = (int)obj->m_particleIndices.size(); #ifdef USE_AVX obj->m_dx_avx.resize(numParticles - obj->m_nFixed); obj->m_f_avx.resize(3 * numParticles); obj->m_xk_avx.resize(numParticles); obj->m_xTilde_avx.resize(numParticles); #else obj->m_dx.resize(numParticles - obj->m_nFixed); obj->m_f.resize(3 * numParticles); obj->m_xk.resize(numParticles); obj->m_xTilde.resize(numParticles); #endif int nFree = numParticles - obj->m_nFixed; #ifdef USE_AVX obj->m_gradient_avx.resize(numParticles, Scalarf8(0.0f)); // L-BFGS buffers (Scalarf8 domain) obj->m_lbfgs_s_avx.resize(m_lbfgsWindowSize); obj->m_lbfgs_y_avx.resize(m_lbfgsWindowSize); for (int w = 0; w < m_lbfgsWindowSize; w++) { obj->m_lbfgs_s_avx[w].resize(nFree, Scalarf8(0.0f)); obj->m_lbfgs_y_avx[w].resize(nFree, Scalarf8(0.0f)); } obj->m_lbfgs_rho.resize(m_lbfgsWindowSize, 0); obj->m_lbfgs_alpha.resize(m_lbfgsWindowSize, 0); obj->m_lbfgs_last_sol_avx.resize(nFree, Scalarf8(0.0f)); obj->m_lbfgs_q_avx.resize(nFree, Scalarf8(0.0f)); obj->m_lbfgs_count = 0; // Newton CG workspace (Scalarf8, coord-packed) obj->m_pcg_r_avx.resize(nFree, Scalarf8(0.0f)); obj->m_pcg_p_avx.resize(nFree, Scalarf8(0.0f)); obj->m_pcg_Ap_avx.resize(nFree, Scalarf8(0.0f)); obj->m_pcg_z_avx.resize(nFree, Scalarf8(0.0f)); #else obj->m_dx_perm.resize(numParticles - obj->m_nFixed); obj->m_gradient.resize(numParticles, Vector3r::Zero()); // L-BFGS buffers obj->m_lbfgs_s.resize(m_lbfgsWindowSize); obj->m_lbfgs_y.resize(m_lbfgsWindowSize); for (int w = 0; w < m_lbfgsWindowSize; w++) { obj->m_lbfgs_s[w].resize(nFree, Vector3r::Zero()); obj->m_lbfgs_y[w].resize(nFree, Vector3r::Zero()); } obj->m_lbfgs_rho.resize(m_lbfgsWindowSize, 0); obj->m_lbfgs_alpha.resize(m_lbfgsWindowSize, 0); obj->m_lbfgs_last_sol.resize(nFree, Vector3r::Zero()); obj->m_lbfgs_q.resize(nFree, Vector3r::Zero()); obj->m_lbfgs_count = 0; // Newton buffers (scalar PCG workspace) obj->m_pcg_r.resize(nFree, Vector3r::Zero()); obj->m_pcg_p.resize(nFree, Vector3r::Zero()); obj->m_pcg_Ap.resize(nFree, Vector3r::Zero()); obj->m_pcg_z.resize(nFree, Vector3r::Zero()); #endif // Shared Newton buffers (both builds) obj->m_hessian9x9.resize(numParticles); obj->m_pcg_precond.resize(nFree, Matrix3r::Identity()); } else // no cache found { ElasticObject* obj = m_objects[objIndex]; // compute new factorization if no factorization can be reused if (!foundFactorization) initFactorization(obj->m_factorization, obj->m_particleIndices, obj->m_nFixed, dt, m_mu); // init vectors int numParticles = (int)obj->m_particleIndices.size(); #ifdef USE_AVX obj->m_dx_avx.resize(numParticles - obj->m_nFixed); obj->m_f_avx.resize(3 * numParticles); obj->m_xk_avx.resize(numParticles); obj->m_xTilde_avx.resize(numParticles); #else obj->m_dx.resize(numParticles - obj->m_nFixed); obj->m_f.resize(3 * numParticles); obj->m_xk.resize(numParticles); obj->m_xTilde.resize(numParticles); #endif int nFree = numParticles - obj->m_nFixed; #ifdef USE_AVX obj->m_gradient_avx.resize(numParticles, Scalarf8(0.0f)); // L-BFGS buffers (Scalarf8 domain) obj->m_lbfgs_s_avx.resize(m_lbfgsWindowSize); obj->m_lbfgs_y_avx.resize(m_lbfgsWindowSize); for (int w = 0; w < m_lbfgsWindowSize; w++) { obj->m_lbfgs_s_avx[w].resize(nFree, Scalarf8(0.0f)); obj->m_lbfgs_y_avx[w].resize(nFree, Scalarf8(0.0f)); } obj->m_lbfgs_rho.resize(m_lbfgsWindowSize, 0); obj->m_lbfgs_alpha.resize(m_lbfgsWindowSize, 0); obj->m_lbfgs_last_sol_avx.resize(nFree, Scalarf8(0.0f)); obj->m_lbfgs_q_avx.resize(nFree, Scalarf8(0.0f)); obj->m_lbfgs_count = 0; // Newton CG workspace (Scalarf8, coord-packed) obj->m_pcg_r_avx.resize(nFree, Scalarf8(0.0f)); obj->m_pcg_p_avx.resize(nFree, Scalarf8(0.0f)); obj->m_pcg_Ap_avx.resize(nFree, Scalarf8(0.0f)); obj->m_pcg_z_avx.resize(nFree, Scalarf8(0.0f)); #else obj->m_dx_perm.resize(numParticles - obj->m_nFixed); obj->m_gradient.resize(numParticles, Vector3r::Zero()); // L-BFGS buffers obj->m_lbfgs_s.resize(m_lbfgsWindowSize); obj->m_lbfgs_y.resize(m_lbfgsWindowSize); for (int w = 0; w < m_lbfgsWindowSize; w++) { obj->m_lbfgs_s[w].resize(nFree, Vector3r::Zero()); obj->m_lbfgs_y[w].resize(nFree, Vector3r::Zero()); } obj->m_lbfgs_rho.resize(m_lbfgsWindowSize, 0); obj->m_lbfgs_alpha.resize(m_lbfgsWindowSize, 0); obj->m_lbfgs_last_sol.resize(nFree, Vector3r::Zero()); obj->m_lbfgs_q.resize(nFree, Vector3r::Zero()); obj->m_lbfgs_count = 0; // Newton buffers (scalar PCG workspace) obj->m_pcg_r.resize(nFree, Vector3r::Zero()); obj->m_pcg_p.resize(nFree, Vector3r::Zero()); obj->m_pcg_Ap.resize(nFree, Vector3r::Zero()); obj->m_pcg_z.resize(nFree, Vector3r::Zero()); #endif // Shared Newton buffers (both builds) obj->m_hessian9x9.resize(numParticles); obj->m_pcg_precond.resize(nFree, Matrix3r::Identity()); // write cache file if (sim->getUseCache() && (Utilities::FileSystem::makeDir(sim->getCachePath()) == 0)) { BinaryFileWriter binWriter; binWriter.openFile(cacheFileName); binWriter.writeSparseMatrix(obj->m_factorization->m_D); binWriter.writeSparseMatrix(obj->m_factorization->m_DT_K); binWriter.write(obj->m_factorization->m_dt); binWriter.write(obj->m_factorization->m_mu); binWriter.writeSparseMatrix(obj->m_factorization->m_matHTH); #ifdef USE_AVX obj->m_factorization->m_cholesky->save(binWriter); #else binWriter.writeSparseMatrix(obj->m_factorization->m_matL); binWriter.writeSparseMatrix(obj->m_factorization->m_matLT); binWriter.writeMatrixX(obj->m_factorization->m_permInd); binWriter.writeMatrixX(obj->m_factorization->m_permInvInd); #endif binWriter.closeFile(); } } obj->m_md5 = md5; } } void Elasticity_Kee2023::initFactorization(std::shared_ptr factorization, std::vector& particleIndices, const unsigned int nFixed, const Real dt, const Real mu) { Simulation* sim = Simulation::getCurrent(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); factorization->m_dt = dt; factorization->m_mu = mu; // init mapping to find the particle indices in the current particle group (object) std::vector groupInv; groupInv.resize(m_model->numActiveParticles()); int numParticles = (int)particleIndices.size(); std::vector& group = particleIndices; for (int i = 0; i < numParticles; i++) groupInv[group[i]] = i; // determine total number of neighbors int totalNeighbors = 0; for (int i = 0; i < (int)numParticles; i++) { int particleIndex = group[i]; const int numNeighbors = (int) m_initialNeighbors[i].size(); totalNeighbors += numNeighbors; } // init triplets for matrices D, K, M std::vector> triplets_D; std::vector> triplets_K; std::vector> triplets_M; triplets_D.reserve(2 * 3 * totalNeighbors); triplets_K.reserve(3 * numParticles); triplets_M.reserve(numParticles); std::vector> triplets_H; triplets_H.reserve(totalNeighbors + numParticles); std::vector> triplets_K2; triplets_K2.reserve(totalNeighbors); const double dtd = dt; const double mud = mu; const double alphad = m_alpha; // init matrices D and K unsigned int row_index = 0; for (int i = 0; i < (int)numParticles; i++) { int particleIndex0 = group[i]; const unsigned int i0 = particleIndex0; unsigned int particleIndex = m_initial_to_current_index[i0]; const double restVolumes_id = m_restVolumes[i]; // init matrix K: constant approximation (2*mu + lambda) for L-BFGS H_0 const double lambdad = static_cast(m_lambda); const double Kreal = (2.0 * mud + lambdad) * dtd * dtd * restVolumes_id; // init mass matrix // all particles have the same mass => M = mass*I triplets_M.push_back(Eigen::Triplet(i, i, m_model->getMass(particleIndex))); for (int j = 0; j < 3; j++) triplets_K.push_back(Eigen::Triplet(3 * i + j, 3 * i + j, Kreal)); const Eigen::Vector3d xi0d = m_model->getPosition0(i0).cast(); const size_t numNeighbors = m_initialNeighbors[i0].size(); for (unsigned int j = 0; j < numNeighbors; j++) { const unsigned int neighborIndex0 = m_initialNeighbors[i0][j]; const unsigned int neighborIndex = m_initial_to_current_index[neighborIndex0]; const Eigen::Vector3d xj0d = m_model->getPosition0(neighborIndex0).cast(); const Eigen::Vector3d correctedKernelGradient = m_L[particleIndex].cast() * sim->gradW((xi0d - xj0d).cast()).cast(); const double restVolumes_jd = m_restVolumes[neighborIndex]; const Eigen::Vector3d y = restVolumes_jd * correctedKernelGradient; // init matrix D according to Eq. 3 and 4 in the supplemental document of the paper triplets_D.push_back(Eigen::Triplet(3 * i + 0, i, -y[0])); triplets_D.push_back(Eigen::Triplet(3 * i + 1, i, -y[1])); triplets_D.push_back(Eigen::Triplet(3 * i + 2, i, -y[2])); triplets_D.push_back(Eigen::Triplet(3 * i + 0, groupInv[neighborIndex0], y[0])); triplets_D.push_back(Eigen::Triplet(3 * i + 1, groupInv[neighborIndex0], y[1])); triplets_D.push_back(Eigen::Triplet(3 * i + 2, groupInv[neighborIndex0], y[2])); double sum = 0.0; const Eigen::Vector3d xi_xj_0 = xi0d - xj0d; const double xixj0_l2 = xi_xj_0.squaredNorm(); const double beta = 1.0; // init matrix \tilde K * dt^2 according to Eq. 2 in the paper const double Kreal2 = alphad * dtd * dtd * mud * static_cast(m_restVolumes[i]) * static_cast(m_restVolumes[neighborIndex]) * (sim->W(xi_xj_0.cast()) / xixj0_l2); triplets_K2.push_back(Eigen::Triplet(row_index, row_index, Kreal2)); // init matrix H according to Eq. 5, 6 and 7 in the supplemental document of the paper for (unsigned int k = 0; k < numNeighbors; k++) { const unsigned int kIndex0 = m_initialNeighbors[i0][k]; const unsigned int kIndex = m_initial_to_current_index[kIndex0]; const Eigen::Vector3d xk0d = m_model->getPosition0(kIndex0).cast(); const Eigen::Vector3d correctedKernelGradientd = m_L[particleIndex].cast() * sim->gradW((xi0d - xk0d).cast()).cast(); const double restVolumes_kd = m_restVolumes[kIndex]; const double H3j = beta * restVolumes_kd * correctedKernelGradientd.dot(xi0d - xj0d); triplets_H.push_back(Eigen::Triplet(row_index, groupInv[kIndex0], H3j)); sum -= H3j; if (k == j) { triplets_H.push_back(Eigen::Triplet(row_index, groupInv[kIndex0], beta)); } } triplets_H.push_back(Eigen::Triplet(row_index, i, sum - beta)); row_index++; } } Eigen::SparseMatrix D(3 * numParticles, numParticles); factorization->m_D.resize(3 * numParticles, numParticles); D.setFromTriplets(triplets_D.begin(), triplets_D.end()); factorization->m_D = D.cast(); //set matrices Eigen::SparseMatrix K(3 * numParticles, 3 * numParticles); // actually 2 * dt* dt * K K.setFromTriplets(triplets_K.begin(), triplets_K.end()); Eigen::SparseMatrix M(numParticles, numParticles); M.setFromTriplets(triplets_M.begin(), triplets_M.end()); Eigen::SparseMatrix DT_K(numParticles, 3 * numParticles); factorization->m_DT_K.resize(numParticles, 3 * numParticles); DT_K = D.transpose() * K; factorization->m_DT_K = DT_K.cast(); Eigen::SparseMatrix K2(totalNeighbors, totalNeighbors); K2.setFromTriplets(triplets_K2.begin(), triplets_K2.end()); Eigen::SparseMatrix H(totalNeighbors, numParticles); H.setFromTriplets(triplets_H.begin(), triplets_H.end()); Eigen::SparseMatrix HTH(numParticles, numParticles); factorization->m_matHTH.resize(numParticles, numParticles); HTH = H.transpose() * K2 * H; factorization->m_matHTH = HTH.cast(); LOG_INFO << "Non zero elements (H^T * K * H): " << factorization->m_matHTH.nonZeros(); Eigen::SparseMatrix DT_K_D = DT_K * D; Eigen::SparseMatrix M_plus_DT_K_D; // init linear system matrix according to Eq. 29 in the paper if (m_alpha != 0.0) M_plus_DT_K_D = (M + DT_K_D + HTH).block(0, 0, numParticles - nFixed, numParticles - nFixed); else // no zero energy mode control M_plus_DT_K_D = (M + DT_K_D).block(0, 0, numParticles - nFixed, numParticles - nFixed); M_plus_DT_K_D.makeCompressed(); #ifdef USE_AVX // compute factorization of the matrix delete factorization->m_cholesky; factorization->m_cholesky = new CholeskyAVXSolver(M_plus_DT_K_D); #else SolverLLT* solverLLT = new SolverLLT(); solverLLT->compute(M_plus_DT_K_D); if (solverLLT->info() != Eigen::Success) { LOG_ERR << "Cholesky decomposition failed."; LOG_ERR << solverLLT->info(); return; } factorization->m_permInd = solverLLT->permutationP().indices(); factorization->m_permInvInd = solverLLT->permutationPinv().indices(); factorization->m_matL = Eigen::SparseMatrix(solverLLT->matrixL().cast()); factorization->m_matLT = Eigen::SparseMatrix(solverLLT->matrixU().cast()); delete solverLLT; #endif LOG_INFO << "Non zero elements (A): " << M_plus_DT_K_D.nonZeros(); } void Elasticity_Kee2023::step() { // apply accelerations const unsigned int numParticles = m_model->numActiveParticles(); const Real dt = TimeManager::getCurrent()->getTimeStepSize(); #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (m_model->getParticleState(i) == ParticleState::Active) { Vector3r& vel = m_model->getVelocity(i); vel += dt * m_model->getAcceleration(i); m_model->getAcceleration(i).setZero(); } } } START_TIMING("Elasticity") stepElasticitySolver(); // elasticity solver using the factorization STOP_TIMING_AVG } void Elasticity_Kee2023::reset() { initValues(); } void Elasticity_Kee2023::performNeighborhoodSearchSort() { const unsigned int numPart = m_model->numActiveParticles(); if (numPart == 0) return; Simulation *sim = Simulation::getCurrent(); auto const& d = sim->getNeighborhoodSearch()->point_set(m_model->getPointSetIndex()); d.sort_field(&m_rotations[0]); d.sort_field(&m_current_to_initial_index[0]); d.sort_field(&m_L[0]); d.sort_field(&m_restVolumes[0]); for (unsigned int i = 0; i < numPart; i++) m_initial_to_current_index[m_current_to_initial_index[i]] = i; } void Elasticity_Kee2023::computeMatrixL() { Simulation *sim = Simulation::getCurrent(); const unsigned int numParticles = m_model->numActiveParticles(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = m_current_to_initial_index[i]; const Vector3r &xi0 = m_model->getPosition0(i0); Matrix3r L; L.setZero(); const size_t numNeighbors = m_initialNeighbors[i0].size(); // Fluid for (unsigned int j = 0; j < numNeighbors; j++) { // get initial neighbor index considering the current particle order const unsigned int neighborIndex0 = m_initialNeighbors[i0][j]; const unsigned int neighborIndex = m_initial_to_current_index[neighborIndex0]; const Vector3r &xj0 = m_model->getPosition0(neighborIndex0); const Vector3r xj_xi_0 = xj0 - xi0; const Vector3r gradW = sim->gradW(xj_xi_0); // minus because gradW(xij0) == -gradW(xji0) L -= m_restVolumes[neighborIndex] * gradW * xj_xi_0.transpose(); } // add 1 to z-component. otherwise we get a singular matrix in 2D if (sim->is2DSimulation()) L(2, 2) = 1.0; bool invertible = false; L.computeInverseWithCheck(m_L[i], invertible, static_cast(1e-9)); if (!invertible) { LOG_INFO << "Matrix not invertible."; MathFunctions::pseudoInverse(L, m_L[i]); //m_L[i] = Matrix3r::Identity(); } } } } void Elasticity_Kee2023::precomputeValues() { Simulation* sim = Simulation::getCurrent(); const int numParticles = (int)m_model->numActiveParticles(); // Scalar format (always filled — used by Newton CG in both builds) m_precomputed_indices.clear(); m_precomp_V_gradW.clear(); m_precomputed_indices.resize(numParticles); unsigned int sumNeighbors = 0; for (int i = 0; i < numParticles; i++) { const unsigned int i0 = m_current_to_initial_index[i]; const size_t numNeighbors = m_initialNeighbors[i0].size(); m_precomputed_indices[i] = sumNeighbors; sumNeighbors += numNeighbors; } m_precomp_V_gradW.resize(sumNeighbors); #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = m_current_to_initial_index[i]; const Vector3r xi0 = m_model->getPosition0(i0); const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size(); unsigned int base = m_precomputed_indices[i]; for (unsigned int j = 0; j < numNeighbors; j++) { const unsigned int neighborIndex0 = m_initialNeighbors[i0][j]; const unsigned int neighborCurrent = m_initial_to_current_index[neighborIndex0]; const Vector3r xj0 = m_model->getPosition0(neighborIndex0); const Real V_j = m_restVolumes[neighborCurrent]; m_precomp_V_gradW[base + j] = V_j * sim->gradW(xi0 - xj0); } } #ifdef USE_AVX // AVX-packed format (additionally filled for L-BFGS force loops) m_precomputed_indices8.clear(); m_precomp_V_gradW8.clear(); m_precomputed_indices8.resize(numParticles); unsigned int sumBlocks = 0; for (int i = 0; i < numParticles; i++) { const unsigned int i0 = m_current_to_initial_index[i]; const size_t numNeighbors = m_initialNeighbors[i0].size(); m_precomputed_indices8[i] = sumBlocks; sumBlocks += (unsigned int)numNeighbors / 8u; if (numNeighbors % 8 != 0) sumBlocks++; } m_precomp_V_gradW8.resize(sumBlocks); #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = m_current_to_initial_index[i]; const Vector3r xi0 = m_model->getPosition0(i0); const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size(); const Vector3f8 xi0_avx(xi0.cast()); unsigned int base8 = m_precomputed_indices8[i]; unsigned int idx = 0; for (unsigned int j = 0; j < numNeighbors; j += 8) { const unsigned int count = std::min(numNeighbors - j, 8u); unsigned int nIndices[8]; for (auto k = 0u; k < count; k++) nIndices[k] = m_initial_to_current_index[m_initialNeighbors[i0][j + k]]; const Scalarf8 Vj_avx = convert_zero(nIndices, &m_restVolumes[0], count); const Vector3f8 xj0_avx = convertVec_zero(&m_initialNeighbors[i0][j], &m_model->getPosition0(0), count); const Vector3f8 gradW_avx = CubicKernel_AVX::gradW(xi0_avx - xj0_avx); m_precomp_V_gradW8[base8 + idx] = gradW_avx * Vj_avx; idx++; } } #endif } void Elasticity_Kee2023::saveState(BinaryFileWriter &binWriter) { binWriter.writeBuffer((char*)m_current_to_initial_index.data(), m_current_to_initial_index.size() * sizeof(unsigned int)); binWriter.writeBuffer((char*)m_initial_to_current_index.data(), m_initial_to_current_index.size() * sizeof(unsigned int)); binWriter.writeBuffer((char*)m_L.data(), m_L.size() * sizeof(Matrix3r)); binWriter.writeBuffer((char*)m_rotations.data(), m_rotations.size() * sizeof(Matrix3r)); } void Elasticity_Kee2023::loadState(BinaryFileReader &binReader) { binReader.readBuffer((char*)m_current_to_initial_index.data(), m_current_to_initial_index.size() * sizeof(unsigned int)); binReader.readBuffer((char*)m_initial_to_current_index.data(), m_initial_to_current_index.size() * sizeof(unsigned int)); binReader.readBuffer((char*)m_L.data(), m_L.size() * sizeof(Matrix3r)); binReader.readBuffer((char*)m_rotations.data(), m_rotations.size() * sizeof(Matrix3r)); } void Elasticity_Kee2023::computeXTilde(ElasticObject* obj) { const std::vector& group = obj->m_particleIndices; const int numParticles = (int)group.size(); const Real dt = obj->m_factorization->m_dt; #ifdef USE_AVX auto& xTilde = obj->m_xTilde_avx; #else auto& xTilde = obj->m_xTilde; #endif #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Vector3r xT = m_model->getPosition(particleIndex) + dt * m_model->getVelocity(particleIndex); #ifdef USE_AVX xTilde[i] = Scalarf8((float)xT[0], (float)xT[1], (float)xT[2], 0.0f, 0.0f, 0.0f, 0.0f, 0.0f); #else xTilde[i] = xT; #endif } } void Elasticity_Kee2023::updateVelocity(ElasticObject* obj, Real fdt) { const std::vector& group = obj->m_particleIndices; const int numParticles = (int)group.size(); #ifdef USE_AVX const auto& xk = obj->m_xk_avx; #else const auto& xk = obj->m_xk; #endif const Real damping = static_cast(0.0); const Real invFdt = (1 - damping) / fdt; #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; if (m_model->getParticleState(particleIndex) == ParticleState::Active) { const Vector3r& x = m_model->getPosition(particleIndex); #ifdef USE_AVX float v[8]; xk[i].store(v); const Vector3r xk_i((Real)v[0], (Real)v[1], (Real)v[2]); m_model->getVelocity(particleIndex) = invFdt * (xk_i - x); #else m_model->getVelocity(particleIndex) = invFdt * (xk[i] - x); #endif } } } #ifdef USE_AVX Real Elasticity_Kee2023::computeEnergy(ElasticObject* obj) { const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); auto& D = obj->m_factorization->m_D; auto& HT_K_H = obj->m_factorization->m_matHTH; auto& f = obj->m_f_avx; // Scalarf8 (3*numParticles entries) auto& xk = obj->m_xk_avx; // Scalarf8 (numParticles entries) auto& xTilde = obj->m_xTilde_avx; const Real fdt = obj->m_factorization->m_dt; Real elasticEnergy = 0; Real massEnergy = 0; // F = D * xk (coord-packed AVX sparse matvec) #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < 3 * numParticles; i++) f[i].setZero(); #pragma omp for schedule(static) for (int k = 0; k < D.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(D, k); it; ++it) f[it.row()] += Scalarf8((float)it.value()) * xk[it.col()]; } // Elastic energy: sum V_i * Psi(F_i) #pragma omp for reduction(+:elasticEnergy) schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; float x0[8], x1[8], x2[8]; f[3 * i].store(x0); f[3 * i + 1].store(x1); f[3 * i + 2].store(x2); Matrix3r Fi; Fi << x0[0], x0[1], x0[2], x1[0], x1[1], x1[2], x2[0], x2[1], x2[2]; Eigen::JacobiSVD svd_i(Fi, Eigen::ComputeFullU | Eigen::ComputeFullV); Matrix3r U = svd_i.matrixU(); if ((U * svd_i.matrixV().transpose()).determinant() < 0) U.col(2) = -U.col(2); Matrix3r Ri = U * svd_i.matrixV().transpose(); elasticEnergy += m_restVolumes[particleIndex] * computePsi(Fi, Ri); } } // Inertial energy: (1/2) m_i ||xk_i - xTilde_i||^2 // Vertical accumulation: each thread keeps a Scalarf8 acc, reduces once at the end. #pragma omp parallel reduction(+:massEnergy) { Scalarf8 acc; acc.setZero(); #pragma omp for schedule(static) nowait for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Real mass = m_model->getMass(particleIndex); const Scalarf8 diff = xk[i] - xTilde[i]; acc += Scalarf8(static_cast(0.5 * mass)) * diff * diff; } massEnergy += (Real)acc.reduce(); } // Zero-energy mode: (1/2) xk^T HTH xk Real zeroEnergy = 0; if (m_alpha != 0.0) { #pragma omp parallel for reduction(+:zeroEnergy) schedule(static) for (int k = 0; k < HT_K_H.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(HT_K_H, k); it; ++it) zeroEnergy += it.value() * (Real)(xk[it.row()] * xk[it.col()]).reduce(); } zeroEnergy *= static_cast(0.5); } return massEnergy + fdt * fdt * elasticEnergy + zeroEnergy; } #else Real Elasticity_Kee2023::computeEnergy(ElasticObject* obj) { const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); auto& D = obj->m_factorization->m_D; auto& HT_K_H = obj->m_factorization->m_matHTH; auto& f = obj->m_f; auto& xk = obj->m_xk; const Real fdt = obj->m_factorization->m_dt; Real elasticEnergy = 0; Real massEnergy = 0; // F = D * xk #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { f[3 * i].setZero(); f[3 * i + 1].setZero(); f[3 * i + 2].setZero(); } #pragma omp for schedule(static) for (int k = 0; k < D.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(D, k); it; ++it) f[it.row()] += it.value() * xk[it.col()]; } // Elastic energy: sum V_i * Psi(F_i) #pragma omp for reduction(+:elasticEnergy) schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; Matrix3r Fi; Fi << f[3 * i][0], f[3 * i][1], f[3 * i][2], f[3 * i + 1][0], f[3 * i + 1][1], f[3 * i + 1][2], f[3 * i + 2][0], f[3 * i + 2][1], f[3 * i + 2][2]; Eigen::JacobiSVD svd_i(Fi, Eigen::ComputeFullU | Eigen::ComputeFullV); Matrix3r U = svd_i.matrixU(); if ((U * svd_i.matrixV().transpose()).determinant() < 0) U.col(2) = -U.col(2); Matrix3r Ri = U * svd_i.matrixV().transpose(); elasticEnergy += m_restVolumes[particleIndex] * computePsi(Fi, Ri); } } // Inertial energy: (1/2) m_i ||xk_i - xTilde_i||^2 auto& xTilde = obj->m_xTilde; #pragma omp parallel for reduction(+:massEnergy) schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Real mass = m_model->getMass(particleIndex); massEnergy += static_cast(0.5) * mass * (xk[i] - xTilde[i]).squaredNorm(); } // Zero-energy mode: (1/2) xk^T HTH xk Real zeroEnergy = 0; if (m_alpha != 0.0) { #pragma omp parallel for reduction(+:zeroEnergy) schedule(static) for (int k = 0; k < HT_K_H.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(HT_K_H, k); it; ++it) zeroEnergy += it.value() * xk[it.row()].dot(xk[it.col()]); } zeroEnergy *= static_cast(0.5); } return massEnergy + fdt * fdt * elasticEnergy + zeroEnergy; } #endif #ifdef USE_AVX Real Elasticity_Kee2023::computeEnergyAndGradient(ElasticObject* obj) { Simulation *sim = Simulation::getCurrent(); const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); auto& D = obj->m_factorization->m_D; auto& HT_K_H = obj->m_factorization->m_matHTH; auto& gradient = obj->m_gradient_avx; // Scalarf8 auto& f = obj->m_f_avx; // Scalarf8 (3*numParticles) auto& xk = obj->m_xk_avx; // Scalarf8 auto& xTilde = obj->m_xTilde_avx; // Scalarf8 const Real fdt = obj->m_factorization->m_dt; Real elasticEnergy = 0; Real massEnergy = 0; // 1. F = D * xk (coord-packed AVX sparse matvec, Kugelstadt Pattern A) #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < 3 * numParticles; i++) f[i].setZero(); #pragma omp for schedule(static) for (int k = 0; k < D.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(D, k); it; ++it) f[it.row()] += Scalarf8((float)it.value()) * xk[it.col()]; } #pragma omp for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; float x0[8], x1[8], x2[8]; f[3 * i].store(x0); f[3 * i + 1].store(x1); f[3 * i + 2].store(x2); m_F[particleIndex] << x0[0], x0[1], x0[2], x1[0], x1[1], x1[2], x2[0], x2[1], x2[2]; } } // 2. Polar decomposition via SVD (scalar — no AVX SVD available). // Precompute P^T * L for each particle (stored in m_PL). // Accumulate elastic energy density Psi(F_i) * V_i. #pragma omp parallel default(shared) { #pragma omp for reduction(+:elasticEnergy) schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; Eigen::JacobiSVD svd_i(m_F[particleIndex], Eigen::ComputeFullU | Eigen::ComputeFullV); Matrix3r U = svd_i.matrixU(); if ((U * svd_i.matrixV().transpose()).determinant() < 0) U.col(2) = -U.col(2); m_rotations[particleIndex] = U * svd_i.matrixV().transpose(); const Matrix3r P_i = computeP(m_F[particleIndex], m_rotations[particleIndex]); m_PL[particleIndex] = P_i.transpose() * m_L[particleIndex]; elasticEnergy += m_restVolumes[particleIndex] * computePsi(m_F[particleIndex], m_rotations[particleIndex]); gradient[i].setZero(); } } // 3. Elastic gradient (scalar neighbor loop; write Scalarf8 gradient). // g_i = -dt^2 * V_i * sum_j V_j * (P_i^T L_i + P_j^T L_j) * gradW #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Vector3r xi0 = m_model->getPosition0(i0); const Real V_i = m_restVolumes[particleIndex]; const Matrix3r& PtL_i = m_PL[particleIndex]; const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size(); const Matrix3f8 PtLi_avx(m_PL[particleIndex].cast()); Vector3f8 force_avx; force_avx.setZero(); for (unsigned int j = 0; j < numNeighbors; j += 8) { const unsigned int count = std::min(numNeighbors - j, 8u); unsigned int nIndices[8]; for (auto k = 0u; k < count; k++) nIndices[k] = m_initial_to_current_index[m_initialNeighbors[i0][j + k]]; const Matrix3f8 PtLj_avx = convertMat_zero(nIndices, &m_PL[0], count); const Vector3f8& V_gradW = m_precomp_V_gradW8[m_precomputed_indices8[particleIndex] + j / 8]; force_avx += PtLi_avx * V_gradW + PtLj_avx * V_gradW; } Vector3r force; force[0] = force_avx.x().reduce(); force[1] = force_avx.y().reduce(); force[2] = force_avx.z().reduce(); const Vector3r g = -(fdt * fdt * V_i * force); gradient[i] = Scalarf8((float)g[0], (float)g[1], (float)g[2], 0.0f, 0.0f, 0.0f, 0.0f, 0.0f); } } // 4. Mass term: g_i += m_i * (xk_i - xTilde_i) // Vertical accumulation for massEnergy (one .reduce() per thread). #pragma omp parallel reduction(+:massEnergy) { Scalarf8 acc; acc.setZero(); #pragma omp for schedule(static) nowait for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Real mass = m_model->getMass(particleIndex); const Scalarf8 diff = xk[i] - xTilde[i]; gradient[i] += Scalarf8((float)mass) * diff; acc += Scalarf8(static_cast(0.5 * mass)) * diff * diff; } massEnergy += (Real)acc.reduce(); } // 5. Zero-energy mode: g += H^T * K2 * H * xk (coord-packed AVX sparse matvec) Real zeroEnergy = 0; if (m_alpha != 0.0) { #pragma omp parallel default(shared) { #pragma omp for reduction(+:zeroEnergy) schedule(static) for (int k = 0; k < HT_K_H.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(HT_K_H, k); it; ++it) { zeroEnergy += it.value() * (Real)(xk[it.row()] * xk[it.col()]).reduce(); gradient[it.col()] += Scalarf8((float)it.value()) * xk[it.row()]; } } } zeroEnergy *= static_cast(0.5); } return massEnergy + fdt * fdt * elasticEnergy + zeroEnergy; } #else Real Elasticity_Kee2023::computeEnergyAndGradient(ElasticObject* obj) { Simulation *sim = Simulation::getCurrent(); const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); auto& D = obj->m_factorization->m_D; auto& HT_K_H = obj->m_factorization->m_matHTH; auto& gradient = obj->m_gradient; auto& f = obj->m_f; auto& xk = obj->m_xk; const Real fdt = obj->m_factorization->m_dt; Real elasticEnergy = 0; Real massEnergy = 0; // 1. Compute deformation gradient: F = D * xk -> m_F #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < numParticles; i++) { f[3 * i].setZero(); f[3 * i + 1].setZero(); f[3 * i + 2].setZero(); } #pragma omp for schedule(static) for (int k = 0; k < D.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(D, k); it; ++it) f[it.row()] += it.value() * xk[it.col()]; } #pragma omp for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; m_F[particleIndex] << f[3 * i][0], f[3 * i][1], f[3 * i][2], f[3 * i + 1][0], f[3 * i + 1][1], f[3 * i + 1][2], f[3 * i + 2][0], f[3 * i + 2][1], f[3 * i + 2][2]; } } // 2. Extract rotation R from F via polar decomposition // Precompute P^T * L for each particle (stored in m_PL) // Accumulate elastic energy density Psi(F_i) * V_i #pragma omp parallel default(shared) { #pragma omp for reduction(+:elasticEnergy) schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; Eigen::JacobiSVD svd_i(m_F[particleIndex], Eigen::ComputeFullU | Eigen::ComputeFullV); Matrix3r U = svd_i.matrixU(); if ((U * svd_i.matrixV().transpose()).determinant() < 0) U.col(2) = -U.col(2); m_rotations[particleIndex] = U * svd_i.matrixV().transpose(); const Matrix3r P_i = computeP(m_F[particleIndex], m_rotations[particleIndex]); m_PL[particleIndex] = P_i.transpose() * m_L[particleIndex]; elasticEnergy += m_restVolumes[particleIndex] * computePsi(m_F[particleIndex], m_rotations[particleIndex]); gradient[i].setZero(); } } // 3. Elastic gradient: g_i = -dt^2 * V_i * sum_j V_j * (P_i^T L_i + P_j^T L_j) * gradW #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Vector3r xi0 = m_model->getPosition0(i0); const Real V_i = m_restVolumes[particleIndex]; const Matrix3r& PtL_i = m_PL[particleIndex]; const size_t numNeighbors = m_initialNeighbors[i0].size(); Vector3r force; force.setZero(); for (unsigned int j = 0; j < numNeighbors; j++) { const unsigned int neighborIndex0 = m_initialNeighbors[i0][j]; const unsigned int neighborCurrent = m_initial_to_current_index[neighborIndex0]; const Matrix3r& PtL_j = m_PL[neighborCurrent]; force += (PtL_i + PtL_j) * m_precomp_V_gradW[m_precomputed_indices[particleIndex] + j]; } gradient[i] = -(fdt * fdt * V_i * force); } } // 4. Mass term: g_i += m_i * (xk_i - xTilde_i) auto& xTilde = obj->m_xTilde; #pragma omp parallel default(shared) { #pragma omp for reduction(+:massEnergy) schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Real mass = m_model->getMass(particleIndex); gradient[i] += mass * (xk[i] - xTilde[i]); massEnergy += static_cast(0.5) * mass * (xk[i] - xTilde[i]).squaredNorm(); } } // 5. Zero-energy mode control: g += H^T * K2 * H * xk Real zeroEnergy = 0; std::vector> gradient_ze(numParticles, Vector3r::Zero()); if (m_alpha != 0.0) { #pragma omp parallel default(shared) { #pragma omp for reduction(+:zeroEnergy) schedule(static) for (int k = 0; k < HT_K_H.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(HT_K_H, k); it; ++it) { zeroEnergy += it.value() * xk[it.row()].dot(xk[it.col()]); gradient_ze[it.col()] += it.value() * xk[it.row()]; } } } zeroEnergy *= static_cast(0.5); for (int i = 0; i < (int)numParticles; i++) gradient[i] += gradient_ze[i]; } return massEnergy + fdt * fdt * elasticEnergy + zeroEnergy; } #endif void Elasticity_Kee2023::computeHessian(ElasticObject* obj) { if (m_materialType == ENUM_MATERIAL_STABLE_NEOHOOKEAN) computeStableNeoHookeanHessian9x9(obj); else // Co-rotated computeCorotatedHessian9x9(obj); } void Elasticity_Kee2023::computeCorotatedHessian9x9(ElasticObject* obj) { const std::vector& group = obj->m_particleIndices; const int numParticles = (int)group.size(); const Real dt = obj->m_factorization->m_dt; const Real dt2 = dt * dt; const Real sqrt2inv = static_cast(1.0) / std::sqrt(static_cast(2.0)); #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Matrix3r& Fi = m_F[particleIndex]; // SVD: F = U * Σ * V^T Eigen::JacobiSVD svd_h(Fi, Eigen::ComputeFullU | Eigen::ComputeFullV); Matrix3r U = svd_h.matrixU(); Matrix3r V = svd_h.matrixV(); Vector3r sigma = svd_h.singularValues(); // Ensure proper rotation (det = +1) if ((U * V.transpose()).determinant() < 0) { U.col(2) = -U.col(2); sigma[2] = -sigma[2]; } // ======== ARAP Hessian: H = Σₖ ⟨λₖ⟩₊ qₖqₖᵀ ======== // Eigenvalues: // twist modes: λ₀ = 1 - 2/(σ₀+σ₁), λ₁ = 1 - 2/(σ₁+σ₂), λ₂ = 1 - 2/(σ₀+σ₂) // flip/scaling: λ₃..λ₈ = 1 Real lambda[3]; lambda[0] = 1 - 2 / (sigma[0] + sigma[1]); lambda[1] = 1 - 2 / (sigma[1] + sigma[2]); lambda[2] = 1 - 2 / (sigma[0] + sigma[2]); // Clamp negative eigenvalues to 0 (PSD projection) for (int k = 0; k < 3; k++) if (lambda[k] < 0) lambda[k] = 0; // Eigenvectors as vec(uᵢvⱼᵀ ± uⱼvᵢᵀ) / sqrt(2) // Twist modes (skew-symmetric) Matrix3r T0 = sqrt2inv * (U.col(0) * V.col(1).transpose() - U.col(1) * V.col(0).transpose()); Matrix3r T1 = sqrt2inv * (U.col(1) * V.col(2).transpose() - U.col(2) * V.col(1).transpose()); Matrix3r T2 = sqrt2inv * (U.col(0) * V.col(2).transpose() - U.col(2) * V.col(0).transpose()); // Flip modes (symmetric off-diagonal) Matrix3r F0 = sqrt2inv * (U.col(0) * V.col(1).transpose() + U.col(1) * V.col(0).transpose()); Matrix3r F1 = sqrt2inv * (U.col(1) * V.col(2).transpose() + U.col(2) * V.col(1).transpose()); Matrix3r F2 = sqrt2inv * (U.col(0) * V.col(2).transpose() + U.col(2) * V.col(0).transpose()); // Scaling modes (diagonal) Matrix3r S0 = U.col(0) * V.col(0).transpose(); Matrix3r S1 = U.col(1) * V.col(1).transpose(); Matrix3r S2 = U.col(2) * V.col(2).transpose(); // Vectorize (column-major) auto vecMap = [](const Matrix3r& M) { Eigen::Matrix v; v << M(0,0), M(1,0), M(2,0), M(0,1), M(1,1), M(2,1), M(0,2), M(1,2), M(2,2); return v; }; Eigen::Matrix q[9]; q[0] = vecMap(T0); q[1] = vecMap(T1); q[2] = vecMap(T2); q[3] = vecMap(F0); q[4] = vecMap(F1); q[5] = vecMap(F2); q[6] = vecMap(S0); q[7] = vecMap(S1); q[8] = vecMap(S2); // Reconstruct ARAP: H = 2μ Σₖ λₖ qₖqₖᵀ (flip/scaling have λ=1) const Real mu2 = static_cast(2.0) * m_mu; Eigen::Matrix K = Eigen::Matrix::Zero(); for (int k = 0; k < 3; k++) K += (mu2 * lambda[k]) * q[k] * q[k].transpose(); for (int k = 3; k < 9; k++) K += mu2 * q[k] * q[k].transpose(); // ======== Volume Hessian: λ * vec(R) * vec(R)^T ======== // d²/dvecF² of (λ/2)(tr(R^T F) - 3)² = λ * R ⊗ R (always PSD) const Matrix3r Ri = U * V.transpose(); Eigen::Matrix vecR; vecR << Ri(0,0), Ri(1,0), Ri(2,0), Ri(0,1), Ri(1,1), Ri(2,1), Ri(0,2), Ri(1,2), Ri(2,2); K += m_lambda * vecR * vecR.transpose(); // Store unscaled material Hessian // The dt² * Vi scaling is applied in the symmetric matvec obj->m_hessian9x9[i] = K; } } void Elasticity_Kee2023::computeStableNeoHookeanHessian9x9(ElasticObject* obj) { const std::vector& group = obj->m_particleIndices; const int numParticles = (int)group.size(); const Real sqrt2inv = static_cast(1.0) / std::sqrt(static_cast(2.0)); const Real eps_tol = static_cast(1e-12); #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Matrix3r& Fi = m_F[particleIndex]; // SVD: F = U * Σ * V^T Eigen::JacobiSVD svd_h(Fi, Eigen::ComputeFullU | Eigen::ComputeFullV); Matrix3r U = svd_h.matrixU(); Matrix3r V = svd_h.matrixV(); Vector3r sigma = svd_h.singularValues(); // Ensure proper rotation (det = +1) if ((U * V.transpose()).determinant() < 0) { U.col(2) = -U.col(2); sigma[2] = -sigma[2]; } const Real s0 = sigma[0], s1 = sigma[1], s2 = sigma[2]; const Real I_C = s0*s0 + s1*s1 + s2*s2; const Real J = s0 * s1 * s2; // Scalars const Real IC1 = I_C + 1; const Real mu_T = m_mu * (1 - 1 / IC1); // Tikhonov coefficient const Real mu_F = 2 * m_mu / (IC1 * IC1); // F rank-1 coefficient const Real gamma_H = m_lambda * (J - 1) - static_cast(0.75) * m_mu; // volume coefficient // Vectorize helper auto vecMap = [](const Matrix3r& M) { Eigen::Matrix v; v << M(0,0), M(1,0), M(2,0), M(0,1), M(1,1), M(2,1), M(0,2), M(1,2), M(2,2); return v; }; // ======== Term 1: μ_T * I₉ (always PSD) ======== Eigen::Matrix K = mu_T * Eigen::Matrix::Identity(); // ======== Term 2: μ_F * vec(F) ⊗ vec(F) (rank-1, always PSD) ======== Eigen::Matrix vecF = vecMap(Fi); K += mu_F * vecF * vecF.transpose(); // ======== Term 3: λ * vec(cof) ⊗ vec(cof) (rank-1, always PSD) ======== Vector3r c0 = Fi.col(1).cross(Fi.col(2)); Vector3r c1 = Fi.col(2).cross(Fi.col(0)); Vector3r c2 = Fi.col(0).cross(Fi.col(1)); Eigen::Matrix vecCof; vecCof << c0[0], c0[1], c0[2], c1[0], c1[1], c1[2], c2[0], c2[1], c2[2]; K += m_lambda * vecCof * vecCof.transpose(); // ======== Term 4: γ_H * H_vol (volume Hessian with PSD projection) ======== // Same eigendecomposition as Co-rotated volume term // Group 1: 6 eigenvalues ±σᵢ Real volEig[6] = { s0, -s0, s1, -s1, s2, -s2 }; Matrix3r volD[6]; volD[0] << 0,0,0, 0,0,1, 0,-1,0; volD[1] << 0,0,0, 0,0,1, 0,1,0; volD[2] << 0,0,1, 0,0,0, -1,0,0; volD[3] << 0,0,1, 0,0,0, 1,0,0; volD[4] << 0,1,0, -1,0,0, 0,0,0; volD[5] << 0,1,0, 1,0,0, 0,0,0; for (int k = 0; k < 6; k++) { Real scaled = gamma_H * volEig[k]; if (scaled > 0) { Matrix3r Q = sqrt2inv * U * volD[k] * V.transpose(); Eigen::Matrix qv = vecMap(Q); K += scaled * qv * qv.transpose(); } } // Group 2: 3 eigenvalues from depressed cubic if (I_C > eps_tol) { const Real sqrtIC3 = std::sqrt(I_C / static_cast(3.0)); const Real cosArg = static_cast(3.0) * J / I_C * std::sqrt(static_cast(3.0) / I_C); const Real clampedCos = std::max(static_cast(-1.0), std::min(static_cast(1.0), cosArg)); const Real theta = std::acos(clampedCos); const Real pi = static_cast(3.14159265358979323846); for (int k = 0; k < 3; k++) { Real eps_k = static_cast(2.0) * sqrtIC3 * std::cos((theta + static_cast(2.0) * pi * k) / static_cast(3.0)); Real scaled = gamma_H * eps_k; if (scaled > 0) { Vector3r diag_k(s0*s2 + s1*eps_k, s1*s2 + s0*eps_k, eps_k*eps_k - s2*s2); Real norm_k = diag_k.norm(); if (norm_k > eps_tol) { diag_k /= norm_k; Matrix3r Q = U * diag_k.asDiagonal() * V.transpose(); Eigen::Matrix qv = vecMap(Q); K += scaled * qv * qv.transpose(); } } } } obj->m_hessian9x9[i] = K; } } void Elasticity_Kee2023::computeNewtonPreconditioner(ElasticObject* obj) { const std::vector& group = obj->m_particleIndices; const int numParticles = (int)group.size(); const int nFree = numParticles - (int)obj->m_nFixed; auto& D = obj->m_factorization->m_D; auto& HTH = obj->m_factorization->m_matHTH; auto& precond = obj->m_pcg_precond; // Initialize diagonal blocks with mass #pragma omp parallel for schedule(static) for (int j = 0; j < nFree; j++) { const unsigned int j0 = group[j]; const unsigned int pj = m_initial_to_current_index[j0]; const Real mj = m_model->getMass(pj); precond[j] = Matrix3r::Identity() * mj; } // Add HTH diagonal (zero-energy mode control) if (m_alpha != static_cast(0.0)) { for (int outer = 0; outer < HTH.outerSize(); outer++) for (Eigen::SparseMatrix::InnerIterator it(HTH, outer); it; ++it) { const int j = (int)it.row(); if (j >= nFree) continue; if (it.row() == it.col()) // diagonal only { for (int b = 0; b < 3; b++) precond[j](b, b) += it.value(); } } } // Add elastic contribution: for each particle i, accumulate dt² * V_i * d_j^T K_i d_j to the j-th diagonal block // K_i is the unscaled material Hessian - dt² * V_i scaling is applied here const Real dt = obj->m_factorization->m_dt; const Real dt2 = dt * dt; for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Real V_i = m_restVolumes[particleIndex]; const Real scale = dt2 * V_i; // Scale factor for this particle's contribution const Eigen::Matrix& Ki = obj->m_hessian9x9[i]; // Gather D values for particle i int nCols = 0; for (Eigen::SparseMatrix::InnerIterator it(D, 3 * i); it; ++it) nCols++; std::vector cols(nCols); std::vector dv0(nCols), dv1(nCols), dv2(nCols); int idx = 0; for (Eigen::SparseMatrix::InnerIterator it(D, 3 * i); it; ++it) { cols[idx] = (int)it.col(); dv0[idx] = it.value(); idx++; } idx = 0; for (Eigen::SparseMatrix::InnerIterator it(D, 3 * i + 1); it; ++it) { dv1[idx] = it.value(); idx++; } idx = 0; for (Eigen::SparseMatrix::InnerIterator it(D, 3 * i + 2); it; ++it) { dv2[idx] = it.value(); idx++; } // For each neighbor j, compute contribution to precond[j] for (int ji = 0; ji < nCols; ji++) { const int j = cols[ji]; if (j >= nFree) continue; // d_j is a 9-vector: d_j[3b+a] = D[3i+b, j] * delta(a matches column index) // Simplified: d_j^T K_i d_j is a 3x3 block const Real dv[3] = { dv0[ji], dv1[ji], dv2[ji] }; // Compute 3x3 block: dt² * V_i * Σ_{b,c} dv[b] * K_i[3b+a, 3c+a'] * dv[c] for each (a,a') for (int a = 0; a < 3; a++) for (int ap = 0; ap < 3; ap++) { Real val = 0; for (int b = 0; b < 3; b++) for (int c = 0; c < 3; c++) val += dv[b] * Ki(3 * b + a, 3 * c + ap) * dv[c]; precond[j](a, ap) += scale * val; } } } // Invert each 3x3 block #pragma omp parallel for schedule(static) for (int j = 0; j < nFree; j++) precond[j] = precond[j].inverse(); } #ifdef USE_AVX void Elasticity_Kee2023::newtonMatvec(ElasticObject* obj) { const std::vector& group = obj->m_particleIndices; const int numParticles = (int)group.size(); const int nFree = numParticles - (int)obj->m_nFixed; auto& p_avx = obj->m_pcg_p_avx; auto& Ap_avx = obj->m_pcg_Ap_avx; const Real dt = obj->m_factorization->m_dt; const Real dt2 = dt * dt; auto& D = obj->m_factorization->m_D; auto& HTH = obj->m_factorization->m_matHTH; auto& f_avx = obj->m_f_avx; // Step 1: Ap_avx = M * p_avx #pragma omp parallel for schedule(static) for (int j = 0; j < nFree; j++) { const unsigned int j0 = group[j]; const unsigned int pj = m_initial_to_current_index[j0]; Ap_avx[j] = Scalarf8((float)m_model->getMass(pj)) * p_avx[j]; } // Step 2: Ap_avx += HTH * p_avx if (m_alpha != static_cast(0.0)) { #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int k = 0; k < HTH.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(HTH, k); it; ++it) { const int row = (int)it.row(); const int col = (int)it.col(); if (row < nFree && col < nFree) Ap_avx[col] = Ap_avx[col] + Scalarf8((float)it.value()) * p_avx[row]; } } } } // Step 3: Dx = D * p (coord-packed AVX sparse matvec) #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < 3 * numParticles; i++) f_avx[i].setZero(); #pragma omp for schedule(static) for (int k = 0; k < D.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(D, k); it; ++it) { const int col = (int)it.col(); if (col < nFree) f_avx[it.row()] = f_avx[it.row()] + Scalarf8((float)it.value()) * p_avx[col]; } } } // Step 4: K*Dx → dPtL (scalar — 9×9 Hessian multiply) const unsigned int numActiveParticles = m_model->numActiveParticles(); std::vector> dPtL(numActiveParticles, Matrix3r::Zero()); #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; float x0[8], x1[8], x2[8]; f_avx[3 * i].store(x0); f_avx[3 * i + 1].store(x1); f_avx[3 * i + 2].store(x2); Eigen::Matrix Dxi; Dxi << (Real)x0[0], (Real)x1[0], (Real)x2[0], (Real)x0[1], (Real)x1[1], (Real)x2[1], (Real)x0[2], (Real)x1[2], (Real)x2[2]; Eigen::Matrix KDx_vec = obj->m_hessian9x9[i] * Dxi; Matrix3r dP; dP.col(0) = KDx_vec.segment<3>(0); dP.col(1) = KDx_vec.segment<3>(3); dP.col(2) = KDx_vec.segment<3>(6); dPtL[particleIndex] = dP.transpose() * m_L[particleIndex]; } // Step 5: Force loop (AVX gather with precomputed V_gradW8), accumulate into Ap_avx #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const float V_i = (float)m_restVolumes[particleIndex]; const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size(); const Matrix3f8 dPtLi_avx(dPtL[particleIndex].cast()); Vector3f8 force_avx; force_avx.setZero(); for (unsigned int j = 0; j < numNeighbors; j += 8) { const unsigned int count = std::min(numNeighbors - j, 8u); unsigned int nIndices[8]; for (auto k = 0u; k < count; k++) nIndices[k] = m_initial_to_current_index[m_initialNeighbors[i0][j + k]]; const Matrix3f8 dPtLj_avx = convertMat_zero(nIndices, &dPtL[0], count); const Vector3f8& V_gradW = m_precomp_V_gradW8[m_precomputed_indices8[particleIndex] + j / 8]; force_avx += dPtLi_avx * V_gradW + dPtLj_avx * V_gradW; } const Scalarf8 fx(force_avx.x().reduce(), force_avx.y().reduce(), force_avx.z().reduce(), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f); Ap_avx[i] = Ap_avx[i] - Scalarf8((float)dt2 * V_i) * fx; } } #else void Elasticity_Kee2023::newtonMatvec(ElasticObject* obj) { const std::vector& group = obj->m_particleIndices; const int numParticles = (int)group.size(); const int nFree = numParticles - (int)obj->m_nFixed; auto& x = obj->m_pcg_p; auto& Ax = obj->m_pcg_Ap; const Real dt = obj->m_factorization->m_dt; const Real dt2 = dt * dt; auto& D = obj->m_factorization->m_D; auto& HTH = obj->m_factorization->m_matHTH; // Initialize Ax = M*x #pragma omp parallel for schedule(static) for (int j = 0; j < nFree; j++) { const unsigned int j0 = group[j]; const unsigned int pj = m_initial_to_current_index[j0]; const Real mj = m_model->getMass(pj); Ax[j] = mj * x[j]; } // Add H^T*K_ze*H*x (zero-energy mode control) // Col-write pattern: Ax[col] += val * x[row] (HTH is symmetric, avoids data race) if (m_alpha != static_cast(0.0)) { #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int k = 0; k < HTH.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(HTH, k); it; ++it) { const int row = (int)it.row(); const int col = (int)it.col(); if (row < nFree && col < nFree) Ax[col] += it.value() * x[row]; } } } } // D * x std::vector> Dx(numParticles); #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) { Eigen::Matrix yi = Eigen::Matrix::Zero(); for (int a = 0; a < 3; a++) { for (Eigen::SparseMatrix::InnerIterator it(D, 3 * i + a); it; ++it) { const int j = (int)it.col(); if (j < nFree) { for (int b = 0; b < 3; b++) yi(3 * b + a) += it.value() * x[j][b]; } } } Dx[i] = yi; } // K * Dx → dPtL const unsigned int numActiveParticles = m_model->numActiveParticles(); std::vector> dPtL(numActiveParticles, Matrix3r::Zero()); #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; Eigen::Matrix KDx_vec = obj->m_hessian9x9[i] * Dx[i]; Matrix3r dP; dP.col(0) = KDx_vec.segment<3>(0); dP.col(1) = KDx_vec.segment<3>(3); dP.col(2) = KDx_vec.segment<3>(6); dPtL[particleIndex] = dP.transpose() * m_L[particleIndex]; } // Force loop Simulation* sim = Simulation::getCurrent(); #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Vector3r xi0 = m_model->getPosition0(i0); const Real V_i = m_restVolumes[particleIndex]; const Matrix3r& dPtL_i = dPtL[particleIndex]; const size_t numNeighbors = m_initialNeighbors[i0].size(); Vector3r force; force.setZero(); for (unsigned int jn = 0; jn < numNeighbors; jn++) { const unsigned int j0 = m_initialNeighbors[i0][jn]; const unsigned int jCurrent = m_initial_to_current_index[j0]; const Matrix3r& dPtL_j = dPtL[jCurrent]; force += (dPtL_i + dPtL_j) * m_precomp_V_gradW[m_precomputed_indices[particleIndex] + jn]; } Ax[i] -= dt2 * V_i * force; } } #endif #ifdef USE_AVX int Elasticity_Kee2023::matFreePCG(ElasticObject* obj) { computeNewtonPreconditioner(obj); const int nFree = (int)obj->m_dx_avx.size(); auto& gradient = obj->m_gradient_avx; auto& dx = obj->m_dx_avx; auto& r = obj->m_pcg_r_avx; auto& p = obj->m_pcg_p_avx; auto& Ap = obj->m_pcg_Ap_avx; auto& z = obj->m_pcg_z_avx; auto& precond = obj->m_pcg_precond; // Initialize: x_0 = 0, r_0 = b = -gradient #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) { dx[i].setZero(); r[i] = Scalarf8(0.0f) - gradient[i]; } // z_0 = M^{-1} r_0 (apply block-diagonal preconditioner) #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) { float rv[8]; r[i].store(rv); Vector3r ri((Real)rv[0], (Real)rv[1], (Real)rv[2]); Vector3r zi = precond[i] * ri; z[i] = Scalarf8((float)zi[0], (float)zi[1], (float)zi[2], 0.0f, 0.0f, 0.0f, 0.0f, 0.0f); } // p_0 = z_0 #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) p[i] = z[i]; // rz_old = r_0^T z_0 (dot product via Scalarf8: (r*z).reduce() sums x*x + y*y + z*z + 0+...) Real rz_old = 0; #pragma omp parallel for reduction(+:rz_old) schedule(static) for (int i = 0; i < nFree; i++) rz_old += (Real)(r[i] * z[i]).reduce(); // Compute initial residual norm for convergence check Real r0_norm = 0; #pragma omp parallel for reduction(+:r0_norm) schedule(static) for (int i = 0; i < nFree; i++) r0_norm += (Real)(r[i] * r[i]).reduce(); r0_norm = std::sqrt(r0_norm); const Real tol = m_tolCG * r0_norm; int iter = 0; for (; iter < m_maxIterCG; iter++) { // Ap = A * p newtonMatvec(obj); // alpha = rz_old / (p^T Ap) Real pAp = 0; #pragma omp parallel for reduction(+:pAp) schedule(static) for (int i = 0; i < nFree; i++) pAp += (Real)(p[i] * Ap[i]).reduce(); if (pAp <= static_cast(0)) { LOG_ERR << "PCG: pAp <= 0 at iter " << iter << ", pAp = " << pAp << " (matrix not positive definite)"; break; } const Scalarf8 alpha_avx((float)(rz_old / pAp)); // x_{k+1} = x_k + alpha * p_k // r_{k+1} = r_k - alpha * Ap #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) { dx[i] = dx[i] + alpha_avx * p[i]; r[i] = r[i] - alpha_avx * Ap[i]; } // Check convergence Real r_norm = 0; #pragma omp parallel for reduction(+:r_norm) schedule(static) for (int i = 0; i < nFree; i++) r_norm += (Real)(r[i] * r[i]).reduce(); r_norm = std::sqrt(r_norm); if (r_norm < tol) { iter++; // count this iteration break; } // z_{k+1} = M^{-1} r_{k+1} (apply block-diagonal preconditioner) #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) { float rv[8]; r[i].store(rv); Vector3r ri((Real)rv[0], (Real)rv[1], (Real)rv[2]); Vector3r zi = precond[i] * ri; z[i] = Scalarf8((float)zi[0], (float)zi[1], (float)zi[2], 0.0f, 0.0f, 0.0f, 0.0f, 0.0f); } // rz_new = r_{k+1}^T z_{k+1} Real rz_new = 0; #pragma omp parallel for reduction(+:rz_new) schedule(static) for (int i = 0; i < nFree; i++) rz_new += (Real)(r[i] * z[i]).reduce(); // beta = rz_new / rz_old const Scalarf8 beta_avx((float)(rz_new / rz_old)); // p_{k+1} = z_{k+1} + beta * p_k #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) p[i] = z[i] + beta_avx * p[i]; rz_old = rz_new; } return iter; } #else int Elasticity_Kee2023::matFreePCG(ElasticObject* obj) { computeNewtonPreconditioner(obj); const int nFree = (int)obj->m_dx.size(); auto& gradient = obj->m_gradient; auto& dx = obj->m_dx; auto& r = obj->m_pcg_r; auto& p = obj->m_pcg_p; auto& Ap = obj->m_pcg_Ap; auto& z = obj->m_pcg_z; auto& precond = obj->m_pcg_precond; // Initialize: x_0 = 0, r_0 = b = -gradient #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) { dx[i].setZero(); r[i] = -gradient[i]; } // z_0 = M^{-1} r_0 (apply block-diagonal preconditioner) #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) z[i] = precond[i] * r[i]; // p_0 = z_0 #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) p[i] = z[i]; // rz_old = r_0^T z_0 Real rz_old = 0; #pragma omp parallel for reduction(+:rz_old) schedule(static) for (int i = 0; i < nFree; i++) rz_old += r[i].dot(z[i]); // Compute initial residual norm for convergence check Real r0_norm = 0; #pragma omp parallel for reduction(+:r0_norm) schedule(static) for (int i = 0; i < nFree; i++) r0_norm += r[i].squaredNorm(); r0_norm = std::sqrt(r0_norm); const Real tol = m_tolCG * r0_norm; int iter = 0; for (; iter < m_maxIterCG; iter++) { // Ap = A * p newtonMatvec(obj); // alpha = rz_old / (p^T Ap) Real pAp = 0; #pragma omp parallel for reduction(+:pAp) schedule(static) for (int i = 0; i < nFree; i++) pAp += p[i].dot(Ap[i]); if (pAp <= static_cast(0)) { LOG_ERR << "PCG: pAp <= 0 at iter " << iter << ", pAp = " << pAp << " (matrix not positive definite)"; break; } const Real alpha = rz_old / pAp; // x_{k+1} = x_k + alpha * p_k // r_{k+1} = r_k - alpha * Ap #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) { dx[i] += alpha * p[i]; r[i] -= alpha * Ap[i]; } // Check convergence Real r_norm = 0; #pragma omp parallel for reduction(+:r_norm) schedule(static) for (int i = 0; i < nFree; i++) r_norm += r[i].squaredNorm(); r_norm = std::sqrt(r_norm); if (r_norm < tol) { iter++; // count this iteration break; } // z_{k+1} = M^{-1} r_{k+1} (apply block-diagonal preconditioner) #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) z[i] = precond[i] * r[i]; // rz_new = r_{k+1}^T z_{k+1} Real rz_new = 0; #pragma omp parallel for reduction(+:rz_new) schedule(static) for (int i = 0; i < nFree; i++) rz_new += r[i].dot(z[i]); // beta = rz_new / rz_old const Real beta = rz_new / rz_old; // p_{k+1} = z_{k+1} + beta * p_k #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) p[i] = z[i] + beta * p[i]; rz_old = rz_new; } return iter; } #endif void Elasticity_Kee2023::prefactorizedLLTSolve(ElasticObject* obj) { #ifdef USE_AVX auto& dx = obj->m_dx_avx; #else auto& dx = obj->m_dx; #endif const int n = (int)dx.size(); #ifdef USE_AVX // dx is std::vector with 3 active lanes per element (x, y, z, 0, 0, 0, 0, 0). // CholeskyAVXSolver::solve takes float* with stride, so we pack/unpack at this boundary. VectorXr b(3 * n), x(3 * n); #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) { float vals[8]; dx[i].store(vals); b[3 * i] = vals[0]; b[3 * i + 1] = vals[1]; b[3 * i + 2] = vals[2]; } // Solve 3 independent n-systems (one per coordinate, stride 3) #pragma omp parallel for schedule(static) for (int c = 0; c < 3; c++) obj->m_factorization->m_cholesky->solve(&x[c], &b[c], 3); #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) dx[i] = Scalarf8(x[3 * i], x[3 * i + 1], x[3 * i + 2], 0.0f, 0.0f, 0.0f, 0.0f, 0.0f); #else auto& dx_perm = obj->m_dx_perm; auto& permInd = obj->m_factorization->m_permInd; auto& permInvInd = obj->m_factorization->m_permInvInd; auto& matL = obj->m_factorization->m_matL; auto& matLT = obj->m_factorization->m_matLT; // Permute dx (fill-in reduction ordering) #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < n; i++) dx_perm[permInd[i]] = dx[i]; } // Forward substitution: L * y = RHS_perm for (int k = 0; k < matL.outerSize(); ++k) for (Eigen::SparseMatrix::InnerIterator it(matL, k); it; ++it) if (it.row() == it.col()) dx_perm[it.row()] /= it.value(); else dx_perm[it.row()] -= it.value() * dx_perm[it.col()]; // Backward substitution: L^T * dx = y for (int k = (int) matLT.outerSize() - 1; k >= 0; --k) for (Eigen::SparseMatrix::ReverseInnerIterator it(matLT, k); it; --it) if (it.row() == it.col()) dx_perm[it.row()] /= it.value(); else dx_perm[it.row()] -= it.value() * dx_perm[it.col()]; // Inverse permutation #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < n; i++) dx[permInvInd[i]] = dx_perm[i]; } #endif } #ifdef USE_AVX Real Elasticity_Kee2023::newtonSolve(ElasticObject* obj, int& cgIter) { // Energy + gradient in AVX (fills gradient_avx, m_F, m_rotations, m_PL) Real energy = computeEnergyAndGradient(obj); // Hessian (scalar, reads m_F, writes m_hessian9x9) computeHessian(obj); // CG in AVX (reads gradient_avx, writes dx_avx) cgIter = matFreePCG(obj); return energy; } #else Real Elasticity_Kee2023::newtonSolve(ElasticObject* obj, int& cgIter) { Real energy = computeEnergyAndGradient(obj); // sets gradient, m_F, m_rotations computeHessian(obj); // computes 9×9 per particle, assembles 3N system cgIter = matFreePCG(obj); // solves 3N scalar system, writes solution into dx // DEBUG: check descent direction and Hessian eigenvalues { const auto& gradient = obj->m_gradient; const auto& dx = obj->m_dx; // dx const int n = (int)dx.size(); // Gradient norm Real gradNorm = 0; for (int i = 0; i < n; i++) gradNorm += gradient[i].squaredNorm(); gradNorm = std::sqrt(gradNorm); // Descent: g · dx (should be negative for descent) Real gDotDx = 0; for (int i = 0; i < n; i++) gDotDx += gradient[i].dot(dx[i]); // Sample 9x9 Hessian eigenvalues for particle 0 if (!obj->m_hessian9x9.empty()) { Eigen::SelfAdjointEigenSolver> eig(obj->m_hessian9x9[0]); const auto& evals = eig.eigenvalues(); std::cout << "DEBUG Newton: E=" << energy << " |g|=" << gradNorm << " g·dx=" << gDotDx << " H[0] eigs=[" << evals.transpose() << "]" << std::endl; } else { std::cout << "DEBUG Newton: E=" << energy << " |g|=" << gradNorm << " g·dx=" << gDotDx << " (no Hessian)" << std::endl; } } return energy; } #endif #ifdef USE_AVX Real Elasticity_Kee2023::lbfgsSolve(ElasticObject* obj) { auto& dx = obj->m_dx_avx; // Scalarf8 const int windowSize = m_lbfgsWindowSize; const int n = (int)dx.size(); int& count = obj->m_lbfgs_count; auto& lbfgs_s = obj->m_lbfgs_s_avx; // Scalarf8 secant history auto& lbfgs_y = obj->m_lbfgs_y_avx; // Scalarf8 secant history auto& lbfgs_rho = obj->m_lbfgs_rho; // Real auto& lbfgs_alpha = obj->m_lbfgs_alpha; // Real auto& last_sol = obj->m_lbfgs_last_sol_avx; // Scalarf8 auto& gradient = obj->m_gradient_avx; // Scalarf8 auto& q = obj->m_lbfgs_q_avx; // Scalarf8 // Save g_{k-1} into q before computeEnergyAndGradient overwrites gradient if (count > 0) { #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) q[i] = gradient[i]; } Real energy = computeEnergyAndGradient(obj); // gradient = g_k // Compute secant pairs from actual position/gradient differences if (count > 0 && windowSize > 0) { int slot = (count - 1) % windowSize; // s_{k-1} = x_k - x_{k-1} #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) lbfgs_s[slot][i] = obj->m_xk_avx[i] - last_sol[i]; // y_{k-1} = g_k - g_{k-1} #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) lbfgs_y[slot][i] = gradient[i] - q[i]; Real ys = 0; #pragma omp parallel reduction(+:ys) { Scalarf8 acc; acc.setZero(); #pragma omp for schedule(static) nowait for (int i = 0; i < n; i++) acc += lbfgs_y[slot][i] * lbfgs_s[slot][i]; ys += (Real)acc.reduce(); } lbfgs_rho[slot] = (std::abs(ys) > static_cast(1e-10)) ? static_cast(1.0) / ys : 0; } // Save current xk for next iteration, set q = g_k for two-loop #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) { q[i] = gradient[i]; last_sol[i] = obj->m_xk_avx[i]; } int nPairs = std::min(count, windowSize); // Backward loop (newest -> oldest secant pair) for (int j = 0; j < nPairs; j++) { int idx = ((count - 1 - j) % windowSize + windowSize) % windowSize; Real sq = 0; #pragma omp parallel reduction(+:sq) { Scalarf8 acc; acc.setZero(); #pragma omp for schedule(static) nowait for (int i = 0; i < n; i++) acc += lbfgs_s[idx][i] * q[i]; sq += (Real)acc.reduce(); } lbfgs_alpha[j] = lbfgs_rho[idx] * sq; const Scalarf8 alpha_avx((float)lbfgs_alpha[j]); #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) q[i] = q[i] - alpha_avx * lbfgs_y[idx][i]; } // Apply initial inverse Hessian: dx = H_0^-1 * q #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) dx[i] = q[i]; prefactorizedLLTSolve(obj); // Forward loop (oldest -> newest secant pair) for (int j = nPairs - 1; j >= 0; j--) { int idx = ((count - 1 - j) % windowSize + windowSize) % windowSize; Real yr = 0; #pragma omp parallel reduction(+:yr) { Scalarf8 acc; acc.setZero(); #pragma omp for schedule(static) nowait for (int i = 0; i < n; i++) acc += lbfgs_y[idx][i] * dx[i]; yr += (Real)acc.reduce(); } Real beta = lbfgs_rho[idx] * yr; const Scalarf8 ab_avx((float)(lbfgs_alpha[j] - beta)); #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) dx[i] = dx[i] + ab_avx * lbfgs_s[idx][i]; } // Search direction: dx = -H*g const Scalarf8 negOne(-1.0f); #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) dx[i] = negOne * dx[i]; count++; return energy; } #else Real Elasticity_Kee2023::lbfgsSolve(ElasticObject* obj) { auto& dx = obj->m_dx; const int windowSize = m_lbfgsWindowSize; const int n = (int)dx.size(); int& count = obj->m_lbfgs_count; auto& lbfgs_s = obj->m_lbfgs_s; auto& lbfgs_y = obj->m_lbfgs_y; auto& lbfgs_rho = obj->m_lbfgs_rho; auto& lbfgs_alpha = obj->m_lbfgs_alpha; auto& last_sol = obj->m_lbfgs_last_sol; auto& gradient = obj->m_gradient; auto& q = obj->m_lbfgs_q; // Save g_{k-1} into q before computeEnergyAndGradient overwrites gradient if (count > 0) { #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) q[i] = gradient[i]; } // Compute energy and gradient at current xk Real energy = computeEnergyAndGradient(obj); // gradient = g_k // Compute secant pairs from actual position/gradient differences if (count > 0 && windowSize > 0) { int slot = (count - 1) % windowSize; // s_{k-1} = x_k - x_{k-1} (actual step taken, includes line search alpha) #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) lbfgs_s[slot][i] = obj->m_xk[i] - last_sol[i]; // y_{k-1} = g_k - g_{k-1} (q holds g_{k-1}) #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) lbfgs_y[slot][i] = gradient[i] - q[i]; Real ys = 0; #pragma omp parallel for reduction(+:ys) schedule(static) for (int i = 0; i < n; i++) ys += lbfgs_y[slot][i].dot(lbfgs_s[slot][i]); lbfgs_rho[slot] = (std::abs(ys) > static_cast(1e-10)) ? static_cast(1.0) / ys : 0; } // Save current xk for next iteration, set q = g_k for two-loop #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) { q[i] = gradient[i]; last_sol[i] = obj->m_xk[i]; } // Number of usable secant pairs int nPairs = std::min(count, windowSize); // L-BFGS two-loop recursion // q contains g_k (gradient) // Backward loop (newest to oldest secant pair) for (int j = 0; j < nPairs; j++) { int idx = ((count - 1 - j) % windowSize + windowSize) % windowSize; Real sq = 0; #pragma omp parallel for reduction(+:sq) schedule(static) for (int i = 0; i < n; i++) sq += lbfgs_s[idx][i].dot(q[i]); lbfgs_alpha[j] = lbfgs_rho[idx] * sq; #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) q[i] -= lbfgs_alpha[j] * lbfgs_y[idx][i]; } // Apply initial inverse Hessian: r = A^{-1} * q #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) dx[i] = q[i]; prefactorizedLLTSolve(obj); // dx now contains r = H_0^{-1} * q // Forward loop (oldest to newest secant pair) for (int j = nPairs - 1; j >= 0; j--) { int idx = ((count - 1 - j) % windowSize + windowSize) % windowSize; Real yr = 0; #pragma omp parallel for reduction(+:yr) schedule(static) for (int i = 0; i < n; i++) yr += lbfgs_y[idx][i].dot(dx[i]); Real beta = lbfgs_rho[idx] * yr; #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) dx[i] += (lbfgs_alpha[j] - beta) * lbfgs_s[idx][i]; } // Search direction: dx = -H*g #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) dx[i] = -dx[i]; count++; return energy; } #endif #ifndef USE_AVX Real Elasticity_Kee2023::lineSearch(ElasticObject* obj, Real energy, int& lsIter) { lsIter = 0; auto& x = obj->m_xk; auto& dx = obj->m_dx; auto& gradient = obj->m_gradient; const int n = (int)dx.size(); // Directional derivative: g^T * dx (must be < 0 for descent) Real dirDeriv = 0; #pragma omp parallel for reduction(+:dirDeriv) schedule(static) for (int i = 0; i < n; i++) dirDeriv += gradient[i].dot(dx[i]); if (dirDeriv >= 0) LOG_WARN << "LS: dirDeriv=" << dirDeriv << " (not a descent direction)"; // Backup current xk std::vector> xk_backup(n); #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) xk_backup[i] = x[i]; Real alpha = static_cast(1.0); const Real eps = static_cast(1e-4); for (int ls = 0; ls < m_maxLSIter; ls++) { // Set trial point: xk = xk_backup + alpha * dx #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) x[i] = xk_backup[i] + alpha * dx[i]; Real trialEnergy = computeEnergy(obj); lsIter++; // Armijo: f(x + alpha*dx) <= f(x) + c1*alpha*(g^T*dx) if (trialEnergy <= energy + m_lsArmijoParam * alpha * dirDeriv) { #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) x[i] = xk_backup[i]; return alpha; } // Energy difference negligible — accept alpha if (std::abs(trialEnergy - energy) < eps) { #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) x[i] = xk_backup[i]; return alpha; } alpha *= m_lsBeta; } // LS exhausted LOG_WARN << "LS exhausted: dirDeriv=" << dirDeriv << " E0=" << energy; // Restore original position #pragma omp parallel for schedule(static) for (int i = 0; i < n; i++) x[i] = xk_backup[i]; return static_cast(0); } #endif #ifdef USE_AVX void Elasticity_Kee2023::stepElasticitySolver() { START_TIMING("Elasticity_Kee2023") const unsigned int numActiveParticles = m_model->numActiveParticles(); if (numActiveParticles == 0) return; precomputeValues(); // AVX build supports both Newton (solverType=0) and L-BFGS (solverType=1). size_t numObjects = m_objects.size(); for (auto objIndex = 0; objIndex < numObjects; objIndex++) { START_TIMING("objSolve") ElasticObject* obj = m_objects[objIndex]; auto& xk = obj->m_xk_avx; // Scalarf8 auto& dx = obj->m_dx_avx; // Scalarf8 const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); const Real fdt = obj->m_factorization->m_dt; // xTilde = x + dt*v (uses externally-set velocity, e.g. from AnimationField) computeXTilde(obj); // Fixed particles: DFSPH integrator skips them, so write xTilde back to model position // so they advance by the externally-prescribed velocity. const int nFixed = (int)obj->m_nFixed; const int firstFixed = numParticles - nFixed; for (int i = firstFixed; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; float v[8]; obj->m_xTilde_avx[i].store(v); m_model->getPosition(particleIndex) = Vector3r((Real)v[0], (Real)v[1], (Real)v[2]); } // xk = xTilde (initial iterate) #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) xk[i] = obj->m_xTilde_avx[i]; const int maxIter = m_maxIter; const int nFree = (int)dx.size(); obj->m_lbfgs_count = 0; int totalCGIter = 0; int iter = 0; for (; iter < maxIter; iter++) { // Compute search direction (stored in dx_avx) Real energy; if (m_solverType == 0) // Newton { int cgIter; energy = newtonSolve(obj, cgIter); totalCGIter += cgIter; } else // L-BFGS { energy = lbfgsSolve(obj); } // Convergence check on ||dx||_inf (reduce from Scalarf8: max of lanes 0..2) Real dxNorm = 0; #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) { float v[8]; dx[i].store(v); const Real localMax = std::max(std::max(std::abs((Real)v[0]), std::abs((Real)v[1])), std::abs((Real)v[2])); #pragma omp critical if (localMax > dxNorm) dxNorm = localMax; } if (dxNorm < m_maxError) break; // xk += dx (step size fixed at 1 — no line search in AVX path) #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) xk[i] += dx[i]; } if (iter == maxIter) LOG_WARN << "Elasticity_Kee2023: not converged after " << maxIter << " iterations"; updateVelocity(obj, fdt); double elapsedMs = STOP_TIMING LOG_INFO << "STATS frame obj=" << objIndex << " solverIter=" << iter + 1 << " lsIter=0 elapsed_ms=" << elapsedMs; } STOP_TIMING_AVG } #else void Elasticity_Kee2023::stepElasticitySolver() { START_TIMING("Elasticity_Kee2023") const unsigned int numActiveParticles = m_model->numActiveParticles(); if (numActiveParticles == 0) return; precomputeValues(); size_t numObjects = m_objects.size(); for (auto objIndex = 0; objIndex < numObjects; objIndex++) { START_TIMING("objSolve") ElasticObject* obj = m_objects[objIndex]; auto& xk = obj->m_xk; auto& dx = obj->m_dx; const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); const Real fdt = obj->m_factorization->m_dt; // Initialize: compute x_tilde = x + dt * v (uses externally-set velocity, e.g. from AnimationField) computeXTilde(obj); // Fixed particles: DFSPH integrator skips them, so write xTilde back to model position // so they advance by the externally-prescribed velocity. const int nFixed = (int)obj->m_nFixed; const int firstFixed = numParticles - nFixed; for (int i = firstFixed; i < numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; m_model->getPosition(particleIndex) = obj->m_xTilde[i]; } // x0 = x_tilde #pragma omp parallel for schedule(static) for (int i = 0; i < numParticles; i++) xk[i] = obj->m_xTilde[i]; // Iterative solve const int maxIter = m_maxIter; const int nFree = (int)dx.size(); // Renew L-BFGS buffers if window size changed at runtime obj->m_lbfgs_count = 0; if ((int)obj->m_lbfgs_s.size() != m_lbfgsWindowSize) { obj->m_lbfgs_s.resize(m_lbfgsWindowSize); obj->m_lbfgs_y.resize(m_lbfgsWindowSize); for (int w = 0; w < m_lbfgsWindowSize; w++) { obj->m_lbfgs_s[w].resize(nFree, Vector3r::Zero()); obj->m_lbfgs_y[w].resize(nFree, Vector3r::Zero()); } obj->m_lbfgs_rho.resize(m_lbfgsWindowSize, 0); obj->m_lbfgs_alpha.resize(m_lbfgsWindowSize, 0); } int totalLSIter = 0; int totalCGIter = 0; int iter = 0; for (; iter < maxIter; iter++) { // Compute search direction (stored in dx) and gradient (stored in m_gradient) Real energy; if (m_solverType == 0) // Newton { int cgIter; energy = newtonSolve(obj, cgIter); totalCGIter += cgIter; } else // L-BFGS { energy = lbfgsSolve(obj); } // Convergence check on ||dx||_inf before line search Real dxNorm = 0; #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) { const Real localMax = dx[i].cwiseAbs().maxCoeff(); #pragma omp critical if (localMax > dxNorm) dxNorm = localMax; } if (dxNorm < m_maxError) break; // Line search Real alpha = static_cast(1.0); if (m_useLineSearch) { int lsIter; alpha = lineSearch(obj, energy, lsIter); totalLSIter += lsIter; if (alpha == static_cast(0)) { LOG_INFO << "Elasticity_Kee2023: line search failed at iter " << iter; break; } } // xk += alpha * dx #pragma omp parallel for schedule(static) for (int i = 0; i < nFree; i++) xk[i] += alpha * dx[i]; } if (iter == maxIter) LOG_WARN << "Elasticity_Kee2023: not converged after " << maxIter << " iterations"; updateVelocity(obj, fdt); double elapsedMs = STOP_TIMING if (m_solverType == 0) // Newton LOG_INFO << "STATS frame obj=" << objIndex << " solverIter=" << iter + 1 << " cgIter=" << totalCGIter << " avgCG=" << (iter > 0 ? totalCGIter / (iter + 1) : 0) << " lsIter=" << totalLSIter << " elapsed_ms=" << elapsedMs; else // L-BFGS LOG_INFO << "STATS frame obj=" << objIndex << " solverIter=" << iter + 1 << " lsIter=" << totalLSIter << " elapsed_ms=" << elapsedMs; } STOP_TIMING_AVG } #endif Real Elasticity_Kee2023::computePsi(const Matrix3r& F, const Matrix3r& R) const { const Real J = F.determinant(); if (m_materialType == ENUM_MATERIAL_STABLE_NEOHOOKEAN) { // Smith et al. 2018 Eq. 14 const Real I_C = (F.transpose() * F).trace(); const Real alpha = 1 + m_mu / m_lambda - m_mu / (4 * m_lambda); const Real Jma = J - alpha; return static_cast(0.5) * m_mu * (I_C - static_cast(3.0)) + static_cast(0.5) * m_lambda * Jma * Jma - static_cast(0.5) * m_mu * std::log(I_C + 1); } // Co-rotated: Psi = μ||F - R||² + (λ/2)(tr(R^T F) - 3)² const Real trRtF = (R.transpose() * F).trace(); return m_mu * (F - R).squaredNorm() + static_cast(0.5) * m_lambda * (trRtF - static_cast(3.0)) * (trRtF - static_cast(3.0)); } Matrix3r Elasticity_Kee2023::computeP(const Matrix3r& F, const Matrix3r& R) const { const Real J = F.determinant(); Matrix3r cofF; cofF.col(0) = F.col(1).cross(F.col(2)); cofF.col(1) = F.col(2).cross(F.col(0)); cofF.col(2) = F.col(0).cross(F.col(1)); if (m_materialType == ENUM_MATERIAL_STABLE_NEOHOOKEAN) { // Smith et al. 2018 Eq. 18 const Real I_C = (F.transpose() * F).trace(); const Real alpha = 1 + m_mu / m_lambda - m_mu / (4 * m_lambda); return m_mu * (1 - 1 / (I_C + 1)) * F + m_lambda * (J - alpha) * cofF; } // Co-rotated: P = 2μ(F - R) + λ(tr(R^T F) - 3) R const Real trRtF = (R.transpose() * F).trace(); return static_cast(2.0) * m_mu * (F - R) + m_lambda * (trRtF - static_cast(3.0)) * R; }