Program Listing for File Elasticity_Kee2023.cpp
↰ Return to documentation for file (SPlisHSPlasH/Elasticity/Elasticity_Kee2023.cpp)
#include "Elasticity_Kee2023.h"
#include "SPlisHSPlasH/Simulation.h"
#include "SPlisHSPlasH/Utilities/MathFunctions.h"
#include "SPlisHSPlasH/TimeManager.h"
#include "Utilities/Timing.h"
#include "Utilities/Counting.h"
#include <extern/md5/md5.h>
#include "Utilities/BinaryFileReaderWriter.h"
#include "Utilities/StringTools.h"
#include <Utilities/FileSystem.h>
#include <array>
#include <chrono>
using namespace SPH;
using namespace GenParam;
std::string Elasticity_Kee2023::METHOD_NAME = "Kee et al. 2023";
int Elasticity_Kee2023::YOUNGS_MODULUS = -1;
int Elasticity_Kee2023::POISSON_RATIO = -1;
int Elasticity_Kee2023::FIXED_BOX_MIN = -1;
int Elasticity_Kee2023::FIXED_BOX_MAX = -1;
int Elasticity_Kee2023::FIXED_BOX2_MIN = -1;
int Elasticity_Kee2023::FIXED_BOX2_MAX = -1;
int Elasticity_Kee2023::ALPHA = -1;
int Elasticity_Kee2023::MAX_NEIGHBORS = -1;
int Elasticity_Kee2023::SOLVER_TYPE = -1;
int Elasticity_Kee2023::LBFGS_WINDOW_SIZE = -1;
int Elasticity_Kee2023::MATERIAL_TYPE = -1;
int Elasticity_Kee2023::MAX_ITER = -1;
int Elasticity_Kee2023::MAX_ERROR = -1;
int Elasticity_Kee2023::MAX_ITER_CG = -1;
int Elasticity_Kee2023::TOL_CG = -1;
int Elasticity_Kee2023::MAX_LS_ITER = -1;
int Elasticity_Kee2023::LS_ARMIJO_PARAM = -1;
int Elasticity_Kee2023::LS_BETA = -1;
int Elasticity_Kee2023::USE_LINE_SEARCH = -1;
int Elasticity_Kee2023::ENUM_SOLVER_NEWTON = -1;
int Elasticity_Kee2023::ENUM_SOLVER_LBFGS = -1;
int Elasticity_Kee2023::ENUM_MATERIAL_STABLE_NEOHOOKEAN = -1;
int Elasticity_Kee2023::ENUM_MATERIAL_COROTATED = -1;
Elasticity_Kee2023::Elasticity_Kee2023(FluidModel *model) :
NonPressureForceBase(model)
{
const unsigned int numParticles = model->numActiveParticles();
m_restVolumes.resize(numParticles);
m_current_to_initial_index.resize(numParticles);
m_initial_to_current_index.resize(numParticles);
m_initialNeighbors.resize(numParticles);
m_rotations.resize(numParticles, Matrix3r::Identity());
m_stress.resize(numParticles);
m_fixedGroupId.resize(numParticles, 0);
m_L.resize(numParticles); // kernel gradient correction matrix L
m_F.resize(numParticles); // deformation gradient
m_PL.resize(numParticles); // stores the rotation matrix times the matrix L
m_youngsModulus = static_cast<Real>(5000000.0);
m_poissonRatio = static_cast<Real>(0.45);
m_alpha = 0.0;
m_maxNeighbors = -1;
m_fixedBoxMin.setZero();
m_fixedBoxMax.setZero();
m_fixedBox2Min.setZero();
m_fixedBox2Max.setZero();
m_solverType = 1;
m_lbfgsWindowSize = 5;
m_materialType = 1; // Co-rotated
m_maxIter = 100;
m_maxError = static_cast<Real>(1e-6);
m_maxIterCG = 100;
m_tolCG = static_cast<Real>(1e-4);
m_maxLSIter = 20;
m_lsArmijoParam = static_cast<Real>(1e-4);
m_lsBeta = static_cast<Real>(0.5);
m_useLineSearch = true;
model->addField({ "rest volume", METHOD_NAME, FieldType::Scalar, [&](const unsigned int i) -> Real* { return &m_restVolumes[i]; }, true });
model->addField({ "rotation", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_rotations[i](0,0); } });
model->addField({ "stress", METHOD_NAME, FieldType::Scalar, [&](const unsigned int i) -> Real* { return &m_stress[i]; } });
model->addField({ "deformation gradient", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_F[i](0,0); } });
model->addField({ "correction matrix", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_L[i](0,0); } });
}
Elasticity_Kee2023::~Elasticity_Kee2023(void)
{
m_model->removeFieldByName("rest volume");
m_model->removeFieldByName("rotation");
m_model->removeFieldByName("stress");
m_model->removeFieldByName("deformation gradient");
m_model->removeFieldByName("correction matrix");
for (auto objIndex = 0; objIndex < m_objects.size(); objIndex++)
{
delete m_objects[objIndex];
}
m_objects.clear();
}
void Elasticity_Kee2023::deferredInit()
{
initValues();
}
void Elasticity_Kee2023::initParameters()
{
NonPressureForceBase::initParameters();
ParameterBase::GetFunc<Real> getFctYM = [&]()-> Real { return m_youngsModulus; };
ParameterBase::SetFunc<Real> setFctYM = [&](Real val)
{
m_youngsModulus = val;
m_mu = m_youngsModulus / (static_cast<Real>(2.0) * (static_cast<Real>(1.0) + m_poissonRatio));
m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast<Real>(1.0) + m_poissonRatio) * (static_cast<Real>(1.0) - static_cast<Real>(2.0) * m_poissonRatio));
if (Simulation::getCurrent()->isSimulationInitialized()) // if Young's modulus has changed, recompute the factorization
Simulation::getCurrent()->reset();
};
YOUNGS_MODULUS = createNumericParameter("youngsModulus", "Young`s modulus", getFctYM, setFctYM);
setGroup(YOUNGS_MODULUS, "Fluid Model|Elasticity");
setDescription(YOUNGS_MODULUS, "Stiffness of the elastic material");
RealParameter* rparam = static_cast<RealParameter*>(getParameter(YOUNGS_MODULUS));
rparam->setMinValue(0.0);
ParameterBase::GetFunc<Real> getFctPR = [&]()-> Real { return m_poissonRatio; };
ParameterBase::SetFunc<Real> setFctPR = [&](Real val)
{
m_poissonRatio = val;
m_mu = m_youngsModulus / (static_cast<Real>(2.0) * (static_cast<Real>(1.0) + m_poissonRatio));
m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast<Real>(1.0) + m_poissonRatio) * (static_cast<Real>(1.0) - static_cast<Real>(2.0) * m_poissonRatio));
if (Simulation::getCurrent()->isSimulationInitialized()) // if Poisson ration has changed, recompute the factorization
Simulation::getCurrent()->reset();
};
POISSON_RATIO = createNumericParameter("poissonsRatio", "Poisson`s ratio", getFctPR, setFctPR);
setGroup(POISSON_RATIO, "Fluid Model|Elasticity");
setDescription(POISSON_RATIO, "Ratio of transversal expansion and axial compression");
rparam = static_cast<RealParameter*>(getParameter(POISSON_RATIO));
rparam->setMinValue(static_cast<Real>(-1.0 + 1e-4));
rparam->setMaxValue(static_cast<Real>(0.5 - 1e-4));
ParameterBase::GetFunc<Real> getFctAlpha = [&]()-> Real { return m_alpha; };
ParameterBase::SetFunc<Real> setFctAlpha = [&](Real val)
{
m_alpha = val;
if (Simulation::getCurrent()->isSimulationInitialized()) // if value has changed, recompute the factorization
Simulation::getCurrent()->reset();
};
ALPHA = createNumericParameter("alpha", "Zero-energy modes suppression", getFctAlpha, setFctAlpha);
setGroup(ALPHA, "Fluid Model|Elasticity");
setDescription(ALPHA, "Coefficent for zero-energy modes suppression method");
rparam = static_cast<RealParameter*>(getParameter(ALPHA));
rparam->setMinValue(0.0);
ParameterBase::GetFunc<int> getFct5 = [&]()-> int { return m_maxNeighbors; };
ParameterBase::SetFunc<int> setFct5 = [&](int val)
{
m_maxNeighbors = val;
if (Simulation::getCurrent()->isSimulationInitialized()) // if value has changed, recompute the factorization
Simulation::getCurrent()->reset();
};
MAX_NEIGHBORS = createNumericParameter("maxNeighbors", "Max. neighbors", getFct5, setFct5);
setGroup(MAX_NEIGHBORS, "Fluid Model|Elasticity");
setDescription(MAX_NEIGHBORS, "Maximum number of neighbors that are considered.");
ParameterBase::GetVecFunc<Real> getFctFMin = [&]()-> Real* { return m_fixedBoxMin.data(); };
ParameterBase::SetVecFunc<Real> setFctFMin = [&](Real* val)
{
m_fixedBoxMin = Vector3r(val[0], val[1], val[2]);
determineFixedParticles();
};
FIXED_BOX_MIN = createVectorParameter("fixedBoxMin", "Fixed box min", 3u, getFctFMin, setFctFMin);
setGroup(FIXED_BOX_MIN, "Fluid Model|Elasticity");
setDescription(FIXED_BOX_MIN, "Minimum point of box of which contains the fixed particles.");
getParameter(FIXED_BOX_MIN)->setReadOnly(true);
ParameterBase::GetVecFunc<Real> getFctFMax = [&]()-> Real* { return m_fixedBoxMax.data(); };
ParameterBase::SetVecFunc<Real> setFctFMax = [&](Real* val)
{
m_fixedBoxMax = Vector3r(val[0], val[1], val[2]);
determineFixedParticles();
};
FIXED_BOX_MAX = createVectorParameter("fixedBoxMax", "Fixed box max", 3u, getFctFMax, setFctFMax);
setGroup(FIXED_BOX_MAX, "Fluid Model|Elasticity");
setDescription(FIXED_BOX_MAX, "Maximum point of box of which contains the fixed particles.");
getParameter(FIXED_BOX_MAX)->setReadOnly(true);
ParameterBase::GetVecFunc<Real> getFctF2Min = [&]()-> Real* { return m_fixedBox2Min.data(); };
ParameterBase::SetVecFunc<Real> setFctF2Min = [&](Real* val)
{
m_fixedBox2Min = Vector3r(val[0], val[1], val[2]);
determineFixedParticles();
};
FIXED_BOX2_MIN = createVectorParameter("fixedBox2Min", "Fixed box 2 min", 3u, getFctF2Min, setFctF2Min);
setGroup(FIXED_BOX2_MIN, "Fluid Model|Elasticity");
setDescription(FIXED_BOX2_MIN, "Minimum point of second box containing fixed particles.");
getParameter(FIXED_BOX2_MIN)->setReadOnly(true);
ParameterBase::GetVecFunc<Real> getFctF2Max = [&]()-> Real* { return m_fixedBox2Max.data(); };
ParameterBase::SetVecFunc<Real> setFctF2Max = [&](Real* val)
{
m_fixedBox2Max = Vector3r(val[0], val[1], val[2]);
determineFixedParticles();
};
FIXED_BOX2_MAX = createVectorParameter("fixedBox2Max", "Fixed box 2 max", 3u, getFctF2Max, setFctF2Max);
setGroup(FIXED_BOX2_MAX, "Fluid Model|Elasticity");
setDescription(FIXED_BOX2_MAX, "Maximum point of second box containing fixed particles.");
getParameter(FIXED_BOX2_MAX)->setReadOnly(true);
SOLVER_TYPE = createEnumParameter("solverType", "Solver type", &m_solverType);
setGroup(SOLVER_TYPE, "Fluid Model|Elasticity");
setDescription(SOLVER_TYPE, "Solver used for the elasticity computation.");
EnumParameter* enumParam = static_cast<EnumParameter*>(getParameter(SOLVER_TYPE));
enumParam->addEnumValue("Newton", ENUM_SOLVER_NEWTON);
enumParam->addEnumValue("L-BFGS", ENUM_SOLVER_LBFGS);
LBFGS_WINDOW_SIZE = createNumericParameter("lbfgsWindowSize", "L-BFGS window size", &m_lbfgsWindowSize);
setGroup(LBFGS_WINDOW_SIZE, "Fluid Model|Elasticity");
setDescription(LBFGS_WINDOW_SIZE, "Number of past iterations stored for L-BFGS approximation (only for L-BFGS).");
static_cast<NumericParameter<int>*>(getParameter(LBFGS_WINDOW_SIZE))->setMinValue(0);
MAX_ITER = createNumericParameter("maxIterations", "Max. iterations", &m_maxIter);
setGroup(MAX_ITER, "Fluid Model|Elasticity");
setDescription(MAX_ITER, "Maximum number of solver iterations per time step.");
static_cast<NumericParameter<int>*>(getParameter(MAX_ITER))->setMinValue(1);
MAX_ERROR = createNumericParameter("maxError", "Max. error", &m_maxError);
setGroup(MAX_ERROR, "Fluid Model|Elasticity");
setDescription(MAX_ERROR, "Convergence threshold on infinity norm of position update.");
rparam = static_cast<RealParameter*>(getParameter(MAX_ERROR));
rparam->setMinValue(static_cast<Real>(1e-15));
MAX_ITER_CG = createNumericParameter("maxIterationsCG", "Max. CG iterations", &m_maxIterCG);
setGroup(MAX_ITER_CG, "Fluid Model|Elasticity");
setDescription(MAX_ITER_CG, "Maximum number of CG iterations for Newton linear solve.");
static_cast<NumericParameter<int>*>(getParameter(MAX_ITER_CG))->setMinValue(1);
TOL_CG = createNumericParameter("tolCG", "CG tolerance", &m_tolCG);
setGroup(TOL_CG, "Fluid Model|Elasticity");
setDescription(TOL_CG, "Convergence tolerance for CG solver.");
rparam = static_cast<RealParameter*>(getParameter(TOL_CG));
rparam->setMinValue(static_cast<Real>(1e-15));
MAX_LS_ITER = createNumericParameter("maxLSIterations", "Max. LS iterations", &m_maxLSIter);
setGroup(MAX_LS_ITER, "Fluid Model|Elasticity");
setDescription(MAX_LS_ITER, "Maximum number of backtracking line search iterations.");
static_cast<NumericParameter<int>*>(getParameter(MAX_LS_ITER))->setMinValue(1);
LS_ARMIJO_PARAM = createNumericParameter("lsArmijoParam", "LS Armijo c1", &m_lsArmijoParam);
setGroup(LS_ARMIJO_PARAM, "Fluid Model|Elasticity");
setDescription(LS_ARMIJO_PARAM, "Armijo sufficient decrease parameter for line search.");
rparam = static_cast<RealParameter*>(getParameter(LS_ARMIJO_PARAM));
rparam->setMinValue(static_cast<Real>(1e-10));
rparam->setMaxValue(static_cast<Real>(0.5));
LS_BETA = createNumericParameter("lsBeta", "LS backtracking factor", &m_lsBeta);
setGroup(LS_BETA, "Fluid Model|Elasticity");
setDescription(LS_BETA, "Step size reduction factor per line search iteration.");
rparam = static_cast<RealParameter*>(getParameter(LS_BETA));
rparam->setMinValue(static_cast<Real>(0.01));
rparam->setMaxValue(static_cast<Real>(0.99));
USE_LINE_SEARCH = createBoolParameter("useLineSearch", "Use line search", &m_useLineSearch);
setGroup(USE_LINE_SEARCH, "Fluid Model|Elasticity");
setDescription(USE_LINE_SEARCH, "Enable backtracking Armijo line search.");
MATERIAL_TYPE = createEnumParameter("materialType", "Material type", &m_materialType);
setGroup(MATERIAL_TYPE, "Fluid Model|Elasticity");
setDescription(MATERIAL_TYPE, "Constitutive model for the elastic material.");
enumParam = static_cast<EnumParameter*>(getParameter(MATERIAL_TYPE));
enumParam->addEnumValue("Stable Neo-Hookean", ENUM_MATERIAL_STABLE_NEOHOOKEAN);
enumParam->addEnumValue("Co-rotated", ENUM_MATERIAL_COROTATED);
}
void Elasticity_Kee2023::determineFixedParticles()
{
const unsigned int numParticles = m_model->numActiveParticles();
auto inBox = [](const Vector3r& x, const Vector3r& bmin, const Vector3r& bmax) -> bool
{
return (x[0] > bmin[0]) && (x[1] > bmin[1]) && (x[2] > bmin[2]) &&
(x[0] < bmax[0]) && (x[1] < bmax[1]) && (x[2] < bmax[2]);
};
const bool hasBox1 = !m_fixedBoxMin.isZero() || !m_fixedBoxMax.isZero();
const bool hasBox2 = !m_fixedBox2Min.isZero() || !m_fixedBox2Max.isZero();
if (hasBox1 || hasBox2)
{
for (int i = 0; i < (int)numParticles; i++)
{
const Vector3r& x = m_model->getPosition0(i);
if (hasBox1 && inBox(x, m_fixedBoxMin, m_fixedBoxMax))
{
m_model->setParticleState(i, ParticleState::Fixed);
m_fixedGroupId[i] = 1;
}
else if (hasBox2 && inBox(x, m_fixedBox2Min, m_fixedBox2Max))
{
m_model->setParticleState(i, ParticleState::Fixed);
m_fixedGroupId[i] = 2;
}
}
}
}
std::string Elasticity_Kee2023::computeMD5(const unsigned int objIndex)
{
ElasticObject* obj = m_objects[objIndex];
auto& group = obj->m_particleIndices;
auto numParticles = group.size();
std::vector<unsigned int> tempN;
tempN.resize(obj->m_particleIndices.size() * 2);
for (size_t i = 0; i < numParticles; i++)
{
const unsigned int particleIndex = group[i];
const size_t numNeighbors = m_initialNeighbors[particleIndex].size();
tempN[2 * i] = static_cast<unsigned int>(numNeighbors);
tempN[2 * i + 1] = 0;
for (auto j = 0; j < numNeighbors; j++)
tempN[2 * i + 1] += m_initialNeighbors[particleIndex][j] - group[0];
}
// compute MD5 checksum for all particle positions, this is used for the cache file
MD5 context((unsigned char*)&tempN[0], static_cast<unsigned int>(2 * numParticles * sizeof(unsigned int)));
char* md5hex = context.hex_digest();
tempN.clear();
return std::string(md5hex);
}
void Elasticity_Kee2023::initValues()
{
Simulation *sim = Simulation::getCurrent();
sim->getNeighborhoodSearch()->find_neighbors();
FluidModel *model = m_model;
const unsigned int numParticles = model->numActiveParticles();
const unsigned int fluidModelIndex = model->getPointSetIndex();
m_totalNeighbors = 0u;
// Store the neighbors in the reference configurations and
// compute the volume of each particle in rest state
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
m_current_to_initial_index[i] = i;
m_initial_to_current_index[i] = i;
// reset particle state
if (m_model->getParticleState(i) == ParticleState::Fixed)
m_model->setParticleState(i, ParticleState::Active);
// only neighbors in same phase will influence elasticity
unsigned int numNeighbors = sim->numberOfNeighbors(fluidModelIndex, fluidModelIndex, i);
m_initialNeighbors[i].resize(numNeighbors);
for (unsigned int j = 0; j < numNeighbors; j++)
m_initialNeighbors[i][j] = sim->getNeighbor(fluidModelIndex, fluidModelIndex, i, j);
// if maxNeighbors is set, then sort all neighbors wrt. to their distance to xi
// and only take the maxNeighbors next ones.
if (m_maxNeighbors > 0)
{
struct Comparator {
Comparator(const Vector3r& xi, Vector3r* x) : m_xi(xi), m_x(x) {};
bool operator()(unsigned int a, unsigned int b)
{
return (m_x[a] - m_xi).squaredNorm() < (m_x[b] - m_xi).squaredNorm();
}
Vector3r m_xi;
Vector3r* m_x;
};
// sort the neighbors according to their distance
std::sort(m_initialNeighbors[i].begin(), m_initialNeighbors[i].end(), Comparator(model->getPosition0(i), &model->getPosition0(0)));
// take only the next maxNeighbors
if (m_initialNeighbors[i].size() > m_maxNeighbors)
{
numNeighbors = m_maxNeighbors;
m_initialNeighbors[i].resize(m_maxNeighbors);
}
}
m_totalNeighbors += numNeighbors;
m_rotations[i].setIdentity();
m_stress[i] = 0.0;
m_F[i].setIdentity();
m_PL[i].setIdentity();
}
}
// Symmetrize neighbor lists: if j∈N(i) but i∉N(j), add i to N(j)
if (m_maxNeighbors > 0)
{
for (unsigned int i = 0; i < numParticles; i++)
{
for (size_t jn = 0; jn < m_initialNeighbors[i].size(); jn++)
{
unsigned int j = m_initialNeighbors[i][jn];
bool found = false;
for (size_t kn = 0; kn < m_initialNeighbors[j].size(); kn++)
{
if (m_initialNeighbors[j][kn] == i) { found = true; break; }
}
if (!found)
m_initialNeighbors[j].push_back(i);
}
}
}
// Check neighbor list symmetry
{
unsigned int maxActualNeighbors = 0;
int asymmetricPairs = 0;
for (unsigned int i = 0; i < numParticles; i++)
{
if (m_initialNeighbors[i].size() > maxActualNeighbors)
maxActualNeighbors = (unsigned int)m_initialNeighbors[i].size();
for (size_t jn = 0; jn < m_initialNeighbors[i].size(); jn++)
{
unsigned int j = m_initialNeighbors[i][jn];
bool found = false;
for (size_t kn = 0; kn < m_initialNeighbors[j].size(); kn++)
{
if (m_initialNeighbors[j][kn] == i) { found = true; break; }
}
if (!found) asymmetricPairs++;
}
}
LOG_INFO << "Neighbor stats: maxNeighbors=" << maxActualNeighbors
<< ", asymmetricPairs=" << asymmetricPairs;
}
// Compute rest volumes using final (symmetrized) neighbor lists
for (unsigned int i = 0; i < numParticles; i++)
{
Real density = model->getMass(i) * sim->W_zero();
const Vector3r &xi0 = model->getPosition0(i);
for (size_t j = 0; j < m_initialNeighbors[i].size(); j++)
{
const unsigned int neighborIndex0 = m_initialNeighbors[i][j];
const Vector3r& xj0 = model->getPosition0(neighborIndex0);
density += model->getMass(neighborIndex0) * sim->W(xi0 - xj0);
}
m_restVolumes[i] = model->getMass(i) / density;
}
// mark all particles in the bounding box as fixed
determineFixedParticles();
// find separate objects
START_TIMING("findObjects")
findObjects();
STOP_TIMING_AVG;
// if we find the same object, copy the neighborhood info
size_t numObjects = m_objects.size();
auto& fluidInfos = sim->getFluidInfos();
for (auto objIndex = 1; objIndex < numObjects; objIndex++)
{
bool foundSameObj = false;
int objIndex2;
for (objIndex2 = 0; objIndex2 < objIndex; objIndex2++)
{
if (fluidInfos[objIndex].hasSameParticleSampling(fluidInfos[objIndex2]))
{
foundSameObj = true;
break;
}
}
if (foundSameObj)
{
ElasticObject* obj = m_objects[objIndex];
ElasticObject* obj0 = m_objects[objIndex2];
const std::vector<unsigned int>& group = obj->m_particleIndices;
const std::vector<unsigned int>& group0 = obj0->m_particleIndices;
int numParticles = (int)group.size();
int offset = group[0];
int offset0 = group0[0];
for (int i = 0; i < (int)numParticles; i++)
{
int particleIndex = group[i];
int particleIndex0 = group0[i];
const unsigned int i0 = m_current_to_initial_index[particleIndex];
const unsigned int i00 = m_current_to_initial_index[particleIndex0];
const size_t numNeighbors = m_initialNeighbors[i00].size();
m_initialNeighbors[i0].resize(numNeighbors);
for (int j = 0; j < numNeighbors; j++)
{
m_initialNeighbors[i0][j] = m_initialNeighbors[i00][j] - offset0 + offset;
}
}
}
}
// compute kernel gradient correction matrix
START_TIMING("computeMatrixL")
computeMatrixL();
STOP_TIMING_AVG;
// init factorization
START_TIMING("initSystem")
initSystem();
STOP_TIMING_AVG;
}
void Elasticity_Kee2023::findObjects()
{
Simulation *sim = Simulation::getCurrent();
const unsigned int numParticles = m_model->numActiveParticles();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
FluidModel *model = m_model;
m_objects.clear();
std::map<unsigned int, unsigned int> obj2group;
for (unsigned int i = 0; i < numParticles; i++)
{
const unsigned int objId = m_model->getObjectId(i);
// search for object id in map
if (obj2group.find(objId) == obj2group.end())
{
// if id was not found, then we have a new object
m_objects.push_back(new ElasticObject());
const unsigned int groupIndex = (unsigned int)(m_objects.size()-1);
obj2group[objId] = groupIndex;
const unsigned int i0 = m_current_to_initial_index[i];
m_objects[groupIndex]->m_particleIndices.push_back(i0);
}
else
{
// object already exists
const unsigned int groupIndex = obj2group[objId];
const unsigned int i0 = m_current_to_initial_index[i];
m_objects[groupIndex]->m_particleIndices.push_back(i0);
}
}
// For each object sort the particles so that all fixed particles are at the end of the list.
// This is needed for the factorization to exclude fixed particles.
for (size_t groupIndex = 0; groupIndex < m_objects.size(); groupIndex++)
{
struct Comparator {
Comparator(Elasticity_Kee2023* _this) : m_this(_this) {};
bool operator()(unsigned int a, unsigned int b)
{
if ((m_this->m_model->getParticleState(a) != ParticleState::Active) && (m_this->m_model->getParticleState(b) == ParticleState::Active))
return false;
else if ((m_this->m_model->getParticleState(a) == ParticleState::Active) && (m_this->m_model->getParticleState(b) != ParticleState::Active))
return true;
else if ((m_this->m_model->getParticleState(a) != ParticleState::Active) && (m_this->m_model->getParticleState(b) != ParticleState::Active))
return a < b;
else
return a < b;
}
Elasticity_Kee2023* m_this;
};
std::sort(m_objects[groupIndex]->m_particleIndices.begin(), m_objects[groupIndex]->m_particleIndices.end(), Comparator(this));
m_objects[groupIndex]->m_nFixed = 0;
for (size_t i = 0; i < m_objects[groupIndex]->m_particleIndices.size(); i++)
{
if (m_model->getParticleState(m_objects[groupIndex]->m_particleIndices[i]) != ParticleState::Active)
m_objects[groupIndex]->m_nFixed++;
}
LOG_INFO << "Object " << groupIndex << " - fixed particles: " << m_objects[groupIndex]->m_nFixed;
}
}
void Elasticity_Kee2023::initSystem()
{
Simulation *sim = Simulation::getCurrent();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
// Compute Lamé parameters
m_mu = m_youngsModulus / (static_cast<Real>(2.0) * (static_cast<Real>(1.0) + m_poissonRatio));
m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast<Real>(1.0) + m_poissonRatio) * (static_cast<Real>(1.0) - static_cast<Real>(2.0) * m_poissonRatio));
FluidModel *model = m_model;
size_t numObjects = m_objects.size();
auto& fluidInfos = sim->getFluidInfos();
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
ElasticObject* obj = m_objects[objIndex];
// compute MD5 check sum
std::string md5 = computeMD5(objIndex);
// check if object with same md5 already exists
bool foundFactorization = false;
for (size_t i = 0; i < objIndex; i++)
{
// reuse the factorization for all objects with the same particle sampling to reduce the memory consumption
if (fluidInfos[objIndex].hasSameParticleSampling(fluidInfos[i]))
{
m_objects[objIndex]->m_factorization = m_objects[i]->m_factorization;
foundFactorization = true;
LOG_INFO << "Object " << objIndex << " is using the factorization of object " << i;
break;
}
}
// if no factorization was found, create a new one
if (obj->m_factorization == nullptr)
obj->m_factorization = std::make_shared<Factorization>();
// generate file name for cache file
std::string baseName = Utilities::FileSystem::getFileName(fluidInfos[objIndex].samplesFile);
std::string ext = Utilities::FileSystem::getFileExt(fluidInfos[objIndex].samplesFile);
std::string cacheFileName = sim->getCachePath() + "/" + baseName + "_" + ext +
"_" + std::to_string(fluidInfos[objIndex].mode) +
"_" + md5 + "_" + Utilities::StringTools::real2String(dt) +
"_" + Utilities::StringTools::real2String(m_mu) +
"_" + Utilities::StringTools::real2String(m_alpha) +
".bin";
// Fluid block
if (fluidInfos[objIndex].type == 0)
{
Vector3r diag = (fluidInfos[objIndex].box.max() - fluidInfos[objIndex].box.min());
cacheFileName = sim->getCachePath() + "/Block_" +
Utilities::StringTools::real2String(diag.x()) + "_" + Utilities::StringTools::real2String(diag.y()) + "_" + Utilities::StringTools::real2String(diag.z()) +
"_" + std::to_string(fluidInfos[objIndex].mode) +
"_" + md5 + "_" + Utilities::StringTools::real2String(dt) +
"_" + Utilities::StringTools::real2String(m_mu) +
"_" + Utilities::StringTools::real2String(m_alpha) +
".bin";
}
// check if cache file exists
const bool foundCacheFile = Utilities::FileSystem::fileExists(cacheFileName);
if (sim->getUseCache() && foundCacheFile)
{
// if factorization cannot be reused from another object and a cache file was found, load the cache file
if (!foundFactorization)
{
LOG_INFO << "Read cached factorization: " << cacheFileName;
BinaryFileReader binReader;
binReader.openFile(cacheFileName);
binReader.readSparseMatrix(obj->m_factorization->m_D);
binReader.readSparseMatrix(obj->m_factorization->m_DT_K);
binReader.read(obj->m_factorization->m_dt);
binReader.read(obj->m_factorization->m_mu);
binReader.readSparseMatrix(obj->m_factorization->m_matHTH);
#ifdef USE_AVX
delete obj->m_factorization->m_cholesky;
obj->m_factorization->m_cholesky = new CholeskyAVXSolver();
obj->m_factorization->m_cholesky->load(binReader);
#else
binReader.readSparseMatrix(obj->m_factorization->m_matL);
binReader.readSparseMatrix(obj->m_factorization->m_matLT);
binReader.readMatrixX(obj->m_factorization->m_permInd);
binReader.readMatrixX(obj->m_factorization->m_permInvInd);
#endif
binReader.closeFile();
}
// init vectors
int numParticles = (int)obj->m_particleIndices.size();
#ifdef USE_AVX
obj->m_dx_avx.resize(numParticles - obj->m_nFixed);
obj->m_f_avx.resize(3 * numParticles);
obj->m_xk_avx.resize(numParticles);
obj->m_xTilde_avx.resize(numParticles);
#else
obj->m_dx.resize(numParticles - obj->m_nFixed);
obj->m_f.resize(3 * numParticles);
obj->m_xk.resize(numParticles);
obj->m_xTilde.resize(numParticles);
#endif
int nFree = numParticles - obj->m_nFixed;
#ifdef USE_AVX
obj->m_gradient_avx.resize(numParticles, Scalarf8(0.0f));
// L-BFGS buffers (Scalarf8 domain)
obj->m_lbfgs_s_avx.resize(m_lbfgsWindowSize);
obj->m_lbfgs_y_avx.resize(m_lbfgsWindowSize);
for (int w = 0; w < m_lbfgsWindowSize; w++)
{
obj->m_lbfgs_s_avx[w].resize(nFree, Scalarf8(0.0f));
obj->m_lbfgs_y_avx[w].resize(nFree, Scalarf8(0.0f));
}
obj->m_lbfgs_rho.resize(m_lbfgsWindowSize, 0);
obj->m_lbfgs_alpha.resize(m_lbfgsWindowSize, 0);
obj->m_lbfgs_last_sol_avx.resize(nFree, Scalarf8(0.0f));
obj->m_lbfgs_q_avx.resize(nFree, Scalarf8(0.0f));
obj->m_lbfgs_count = 0;
// Newton CG workspace (Scalarf8, coord-packed)
obj->m_pcg_r_avx.resize(nFree, Scalarf8(0.0f));
obj->m_pcg_p_avx.resize(nFree, Scalarf8(0.0f));
obj->m_pcg_Ap_avx.resize(nFree, Scalarf8(0.0f));
obj->m_pcg_z_avx.resize(nFree, Scalarf8(0.0f));
#else
obj->m_dx_perm.resize(numParticles - obj->m_nFixed);
obj->m_gradient.resize(numParticles, Vector3r::Zero());
// L-BFGS buffers
obj->m_lbfgs_s.resize(m_lbfgsWindowSize);
obj->m_lbfgs_y.resize(m_lbfgsWindowSize);
for (int w = 0; w < m_lbfgsWindowSize; w++)
{
obj->m_lbfgs_s[w].resize(nFree, Vector3r::Zero());
obj->m_lbfgs_y[w].resize(nFree, Vector3r::Zero());
}
obj->m_lbfgs_rho.resize(m_lbfgsWindowSize, 0);
obj->m_lbfgs_alpha.resize(m_lbfgsWindowSize, 0);
obj->m_lbfgs_last_sol.resize(nFree, Vector3r::Zero());
obj->m_lbfgs_q.resize(nFree, Vector3r::Zero());
obj->m_lbfgs_count = 0;
// Newton buffers (scalar PCG workspace)
obj->m_pcg_r.resize(nFree, Vector3r::Zero());
obj->m_pcg_p.resize(nFree, Vector3r::Zero());
obj->m_pcg_Ap.resize(nFree, Vector3r::Zero());
obj->m_pcg_z.resize(nFree, Vector3r::Zero());
#endif
// Shared Newton buffers (both builds)
obj->m_hessian9x9.resize(numParticles);
obj->m_pcg_precond.resize(nFree, Matrix3r::Identity());
}
else // no cache found
{
ElasticObject* obj = m_objects[objIndex];
// compute new factorization if no factorization can be reused
if (!foundFactorization)
initFactorization(obj->m_factorization, obj->m_particleIndices, obj->m_nFixed, dt, m_mu);
// init vectors
int numParticles = (int)obj->m_particleIndices.size();
#ifdef USE_AVX
obj->m_dx_avx.resize(numParticles - obj->m_nFixed);
obj->m_f_avx.resize(3 * numParticles);
obj->m_xk_avx.resize(numParticles);
obj->m_xTilde_avx.resize(numParticles);
#else
obj->m_dx.resize(numParticles - obj->m_nFixed);
obj->m_f.resize(3 * numParticles);
obj->m_xk.resize(numParticles);
obj->m_xTilde.resize(numParticles);
#endif
int nFree = numParticles - obj->m_nFixed;
#ifdef USE_AVX
obj->m_gradient_avx.resize(numParticles, Scalarf8(0.0f));
// L-BFGS buffers (Scalarf8 domain)
obj->m_lbfgs_s_avx.resize(m_lbfgsWindowSize);
obj->m_lbfgs_y_avx.resize(m_lbfgsWindowSize);
for (int w = 0; w < m_lbfgsWindowSize; w++)
{
obj->m_lbfgs_s_avx[w].resize(nFree, Scalarf8(0.0f));
obj->m_lbfgs_y_avx[w].resize(nFree, Scalarf8(0.0f));
}
obj->m_lbfgs_rho.resize(m_lbfgsWindowSize, 0);
obj->m_lbfgs_alpha.resize(m_lbfgsWindowSize, 0);
obj->m_lbfgs_last_sol_avx.resize(nFree, Scalarf8(0.0f));
obj->m_lbfgs_q_avx.resize(nFree, Scalarf8(0.0f));
obj->m_lbfgs_count = 0;
// Newton CG workspace (Scalarf8, coord-packed)
obj->m_pcg_r_avx.resize(nFree, Scalarf8(0.0f));
obj->m_pcg_p_avx.resize(nFree, Scalarf8(0.0f));
obj->m_pcg_Ap_avx.resize(nFree, Scalarf8(0.0f));
obj->m_pcg_z_avx.resize(nFree, Scalarf8(0.0f));
#else
obj->m_dx_perm.resize(numParticles - obj->m_nFixed);
obj->m_gradient.resize(numParticles, Vector3r::Zero());
// L-BFGS buffers
obj->m_lbfgs_s.resize(m_lbfgsWindowSize);
obj->m_lbfgs_y.resize(m_lbfgsWindowSize);
for (int w = 0; w < m_lbfgsWindowSize; w++)
{
obj->m_lbfgs_s[w].resize(nFree, Vector3r::Zero());
obj->m_lbfgs_y[w].resize(nFree, Vector3r::Zero());
}
obj->m_lbfgs_rho.resize(m_lbfgsWindowSize, 0);
obj->m_lbfgs_alpha.resize(m_lbfgsWindowSize, 0);
obj->m_lbfgs_last_sol.resize(nFree, Vector3r::Zero());
obj->m_lbfgs_q.resize(nFree, Vector3r::Zero());
obj->m_lbfgs_count = 0;
// Newton buffers (scalar PCG workspace)
obj->m_pcg_r.resize(nFree, Vector3r::Zero());
obj->m_pcg_p.resize(nFree, Vector3r::Zero());
obj->m_pcg_Ap.resize(nFree, Vector3r::Zero());
obj->m_pcg_z.resize(nFree, Vector3r::Zero());
#endif
// Shared Newton buffers (both builds)
obj->m_hessian9x9.resize(numParticles);
obj->m_pcg_precond.resize(nFree, Matrix3r::Identity());
// write cache file
if (sim->getUseCache() && (Utilities::FileSystem::makeDir(sim->getCachePath()) == 0))
{
BinaryFileWriter binWriter;
binWriter.openFile(cacheFileName);
binWriter.writeSparseMatrix(obj->m_factorization->m_D);
binWriter.writeSparseMatrix(obj->m_factorization->m_DT_K);
binWriter.write(obj->m_factorization->m_dt);
binWriter.write(obj->m_factorization->m_mu);
binWriter.writeSparseMatrix(obj->m_factorization->m_matHTH);
#ifdef USE_AVX
obj->m_factorization->m_cholesky->save(binWriter);
#else
binWriter.writeSparseMatrix(obj->m_factorization->m_matL);
binWriter.writeSparseMatrix(obj->m_factorization->m_matLT);
binWriter.writeMatrixX(obj->m_factorization->m_permInd);
binWriter.writeMatrixX(obj->m_factorization->m_permInvInd);
#endif
binWriter.closeFile();
}
}
obj->m_md5 = md5;
}
}
void Elasticity_Kee2023::initFactorization(std::shared_ptr<Factorization> factorization, std::vector<unsigned int>& particleIndices, const unsigned int nFixed, const Real dt, const Real mu)
{
Simulation* sim = Simulation::getCurrent();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
factorization->m_dt = dt;
factorization->m_mu = mu;
// init mapping to find the particle indices in the current particle group (object)
std::vector<unsigned int> groupInv;
groupInv.resize(m_model->numActiveParticles());
int numParticles = (int)particleIndices.size();
std::vector<unsigned int>& group = particleIndices;
for (int i = 0; i < numParticles; i++)
groupInv[group[i]] = i;
// determine total number of neighbors
int totalNeighbors = 0;
for (int i = 0; i < (int)numParticles; i++)
{
int particleIndex = group[i];
const int numNeighbors = (int) m_initialNeighbors[i].size();
totalNeighbors += numNeighbors;
}
// init triplets for matrices D, K, M
std::vector<Eigen::Triplet<double>> triplets_D;
std::vector<Eigen::Triplet<double>> triplets_K;
std::vector<Eigen::Triplet<double>> triplets_M;
triplets_D.reserve(2 * 3 * totalNeighbors);
triplets_K.reserve(3 * numParticles);
triplets_M.reserve(numParticles);
std::vector<Eigen::Triplet<double>> triplets_H;
triplets_H.reserve(totalNeighbors + numParticles);
std::vector<Eigen::Triplet<double>> triplets_K2;
triplets_K2.reserve(totalNeighbors);
const double dtd = dt;
const double mud = mu;
const double alphad = m_alpha;
// init matrices D and K
unsigned int row_index = 0;
for (int i = 0; i < (int)numParticles; i++)
{
int particleIndex0 = group[i];
const unsigned int i0 = particleIndex0;
unsigned int particleIndex = m_initial_to_current_index[i0];
const double restVolumes_id = m_restVolumes[i];
// init matrix K: constant approximation (2*mu + lambda) for L-BFGS H_0
const double lambdad = static_cast<double>(m_lambda);
const double Kreal = (2.0 * mud + lambdad) * dtd * dtd * restVolumes_id;
// init mass matrix
// all particles have the same mass => M = mass*I
triplets_M.push_back(Eigen::Triplet<double>(i, i, m_model->getMass(particleIndex)));
for (int j = 0; j < 3; j++)
triplets_K.push_back(Eigen::Triplet<double>(3 * i + j, 3 * i + j, Kreal));
const Eigen::Vector3d xi0d = m_model->getPosition0(i0).cast<double>();
const size_t numNeighbors = m_initialNeighbors[i0].size();
for (unsigned int j = 0; j < numNeighbors; j++)
{
const unsigned int neighborIndex0 = m_initialNeighbors[i0][j];
const unsigned int neighborIndex = m_initial_to_current_index[neighborIndex0];
const Eigen::Vector3d xj0d = m_model->getPosition0(neighborIndex0).cast<double>();
const Eigen::Vector3d correctedKernelGradient = m_L[particleIndex].cast<double>() * sim->gradW((xi0d - xj0d).cast<Real>()).cast<double>();
const double restVolumes_jd = m_restVolumes[neighborIndex];
const Eigen::Vector3d y = restVolumes_jd * correctedKernelGradient;
// init matrix D according to Eq. 3 and 4 in the supplemental document of the paper
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 0, i, -y[0]));
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 1, i, -y[1]));
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 2, i, -y[2]));
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 0, groupInv[neighborIndex0], y[0]));
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 1, groupInv[neighborIndex0], y[1]));
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 2, groupInv[neighborIndex0], y[2]));
double sum = 0.0;
const Eigen::Vector3d xi_xj_0 = xi0d - xj0d;
const double xixj0_l2 = xi_xj_0.squaredNorm();
const double beta = 1.0;
// init matrix \tilde K * dt^2 according to Eq. 2 in the paper
const double Kreal2 = alphad * dtd * dtd * mud * static_cast<double>(m_restVolumes[i]) * static_cast<double>(m_restVolumes[neighborIndex]) * (sim->W(xi_xj_0.cast<Real>()) / xixj0_l2);
triplets_K2.push_back(Eigen::Triplet<double>(row_index, row_index, Kreal2));
// init matrix H according to Eq. 5, 6 and 7 in the supplemental document of the paper
for (unsigned int k = 0; k < numNeighbors; k++)
{
const unsigned int kIndex0 = m_initialNeighbors[i0][k];
const unsigned int kIndex = m_initial_to_current_index[kIndex0];
const Eigen::Vector3d xk0d = m_model->getPosition0(kIndex0).cast<double>();
const Eigen::Vector3d correctedKernelGradientd = m_L[particleIndex].cast<double>() * sim->gradW((xi0d - xk0d).cast<Real>()).cast<double>();
const double restVolumes_kd = m_restVolumes[kIndex];
const double H3j = beta * restVolumes_kd * correctedKernelGradientd.dot(xi0d - xj0d);
triplets_H.push_back(Eigen::Triplet<double>(row_index, groupInv[kIndex0], H3j));
sum -= H3j;
if (k == j)
{
triplets_H.push_back(Eigen::Triplet<double>(row_index, groupInv[kIndex0], beta));
}
}
triplets_H.push_back(Eigen::Triplet<double>(row_index, i, sum - beta));
row_index++;
}
}
Eigen::SparseMatrix<double> D(3 * numParticles, numParticles);
factorization->m_D.resize(3 * numParticles, numParticles);
D.setFromTriplets(triplets_D.begin(), triplets_D.end());
factorization->m_D = D.cast<Real>();
//set matrices
Eigen::SparseMatrix<double> K(3 * numParticles, 3 * numParticles); // actually 2 * dt* dt * K
K.setFromTriplets(triplets_K.begin(), triplets_K.end());
Eigen::SparseMatrix<double> M(numParticles, numParticles);
M.setFromTriplets(triplets_M.begin(), triplets_M.end());
Eigen::SparseMatrix<double> DT_K(numParticles, 3 * numParticles);
factorization->m_DT_K.resize(numParticles, 3 * numParticles);
DT_K = D.transpose() * K;
factorization->m_DT_K = DT_K.cast<Real>();
Eigen::SparseMatrix<double> K2(totalNeighbors, totalNeighbors);
K2.setFromTriplets(triplets_K2.begin(), triplets_K2.end());
Eigen::SparseMatrix<double> H(totalNeighbors, numParticles);
H.setFromTriplets(triplets_H.begin(), triplets_H.end());
Eigen::SparseMatrix<double> HTH(numParticles, numParticles);
factorization->m_matHTH.resize(numParticles, numParticles);
HTH = H.transpose() * K2 * H;
factorization->m_matHTH = HTH.cast<Real>();
LOG_INFO << "Non zero elements (H^T * K * H): " << factorization->m_matHTH.nonZeros();
Eigen::SparseMatrix<double> DT_K_D = DT_K * D;
Eigen::SparseMatrix<double> M_plus_DT_K_D;
// init linear system matrix according to Eq. 29 in the paper
if (m_alpha != 0.0)
M_plus_DT_K_D = (M + DT_K_D + HTH).block(0, 0, numParticles - nFixed, numParticles - nFixed);
else // no zero energy mode control
M_plus_DT_K_D = (M + DT_K_D).block(0, 0, numParticles - nFixed, numParticles - nFixed);
M_plus_DT_K_D.makeCompressed();
#ifdef USE_AVX
// compute factorization of the matrix
delete factorization->m_cholesky;
factorization->m_cholesky = new CholeskyAVXSolver(M_plus_DT_K_D);
#else
SolverLLT* solverLLT = new SolverLLT();
solverLLT->compute(M_plus_DT_K_D);
if (solverLLT->info() != Eigen::Success)
{
LOG_ERR << "Cholesky decomposition failed.";
LOG_ERR << solverLLT->info();
return;
}
factorization->m_permInd = solverLLT->permutationP().indices();
factorization->m_permInvInd = solverLLT->permutationPinv().indices();
factorization->m_matL = Eigen::SparseMatrix<Real, Eigen::ColMajor>(solverLLT->matrixL().cast<Real>());
factorization->m_matLT = Eigen::SparseMatrix<Real, Eigen::ColMajor>(solverLLT->matrixU().cast<Real>());
delete solverLLT;
#endif
LOG_INFO << "Non zero elements (A): " << M_plus_DT_K_D.nonZeros();
}
void Elasticity_Kee2023::step()
{
// apply accelerations
const unsigned int numParticles = m_model->numActiveParticles();
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (m_model->getParticleState(i) == ParticleState::Active)
{
Vector3r& vel = m_model->getVelocity(i);
vel += dt * m_model->getAcceleration(i);
m_model->getAcceleration(i).setZero();
}
}
}
START_TIMING("Elasticity")
stepElasticitySolver(); // elasticity solver using the factorization
STOP_TIMING_AVG
}
void Elasticity_Kee2023::reset()
{
initValues();
}
void Elasticity_Kee2023::performNeighborhoodSearchSort()
{
const unsigned int numPart = m_model->numActiveParticles();
if (numPart == 0)
return;
Simulation *sim = Simulation::getCurrent();
auto const& d = sim->getNeighborhoodSearch()->point_set(m_model->getPointSetIndex());
d.sort_field(&m_rotations[0]);
d.sort_field(&m_current_to_initial_index[0]);
d.sort_field(&m_L[0]);
d.sort_field(&m_restVolumes[0]);
for (unsigned int i = 0; i < numPart; i++)
m_initial_to_current_index[m_current_to_initial_index[i]] = i;
}
void Elasticity_Kee2023::computeMatrixL()
{
Simulation *sim = Simulation::getCurrent();
const unsigned int numParticles = m_model->numActiveParticles();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = m_current_to_initial_index[i];
const Vector3r &xi0 = m_model->getPosition0(i0);
Matrix3r L;
L.setZero();
const size_t numNeighbors = m_initialNeighbors[i0].size();
// Fluid
for (unsigned int j = 0; j < numNeighbors; j++)
{
// get initial neighbor index considering the current particle order
const unsigned int neighborIndex0 = m_initialNeighbors[i0][j];
const unsigned int neighborIndex = m_initial_to_current_index[neighborIndex0];
const Vector3r &xj0 = m_model->getPosition0(neighborIndex0);
const Vector3r xj_xi_0 = xj0 - xi0;
const Vector3r gradW = sim->gradW(xj_xi_0);
// minus because gradW(xij0) == -gradW(xji0)
L -= m_restVolumes[neighborIndex] * gradW * xj_xi_0.transpose();
}
// add 1 to z-component. otherwise we get a singular matrix in 2D
if (sim->is2DSimulation())
L(2, 2) = 1.0;
bool invertible = false;
L.computeInverseWithCheck(m_L[i], invertible, static_cast<Real>(1e-9));
if (!invertible)
{
LOG_INFO << "Matrix not invertible.";
MathFunctions::pseudoInverse(L, m_L[i]);
//m_L[i] = Matrix3r::Identity();
}
}
}
}
void Elasticity_Kee2023::precomputeValues()
{
Simulation* sim = Simulation::getCurrent();
const int numParticles = (int)m_model->numActiveParticles();
// Scalar format (always filled — used by Newton CG in both builds)
m_precomputed_indices.clear();
m_precomp_V_gradW.clear();
m_precomputed_indices.resize(numParticles);
unsigned int sumNeighbors = 0;
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = m_current_to_initial_index[i];
const size_t numNeighbors = m_initialNeighbors[i0].size();
m_precomputed_indices[i] = sumNeighbors;
sumNeighbors += numNeighbors;
}
m_precomp_V_gradW.resize(sumNeighbors);
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = m_current_to_initial_index[i];
const Vector3r xi0 = m_model->getPosition0(i0);
const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size();
unsigned int base = m_precomputed_indices[i];
for (unsigned int j = 0; j < numNeighbors; j++)
{
const unsigned int neighborIndex0 = m_initialNeighbors[i0][j];
const unsigned int neighborCurrent = m_initial_to_current_index[neighborIndex0];
const Vector3r xj0 = m_model->getPosition0(neighborIndex0);
const Real V_j = m_restVolumes[neighborCurrent];
m_precomp_V_gradW[base + j] = V_j * sim->gradW(xi0 - xj0);
}
}
#ifdef USE_AVX
// AVX-packed format (additionally filled for L-BFGS force loops)
m_precomputed_indices8.clear();
m_precomp_V_gradW8.clear();
m_precomputed_indices8.resize(numParticles);
unsigned int sumBlocks = 0;
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = m_current_to_initial_index[i];
const size_t numNeighbors = m_initialNeighbors[i0].size();
m_precomputed_indices8[i] = sumBlocks;
sumBlocks += (unsigned int)numNeighbors / 8u;
if (numNeighbors % 8 != 0)
sumBlocks++;
}
m_precomp_V_gradW8.resize(sumBlocks);
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = m_current_to_initial_index[i];
const Vector3r xi0 = m_model->getPosition0(i0);
const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size();
const Vector3f8 xi0_avx(xi0.cast<float>());
unsigned int base8 = m_precomputed_indices8[i];
unsigned int idx = 0;
for (unsigned int j = 0; j < numNeighbors; j += 8)
{
const unsigned int count = std::min(numNeighbors - j, 8u);
unsigned int nIndices[8];
for (auto k = 0u; k < count; k++)
nIndices[k] = m_initial_to_current_index[m_initialNeighbors[i0][j + k]];
const Scalarf8 Vj_avx = convert_zero(nIndices, &m_restVolumes[0], count);
const Vector3f8 xj0_avx = convertVec_zero(&m_initialNeighbors[i0][j], &m_model->getPosition0(0), count);
const Vector3f8 gradW_avx = CubicKernel_AVX::gradW(xi0_avx - xj0_avx);
m_precomp_V_gradW8[base8 + idx] = gradW_avx * Vj_avx;
idx++;
}
}
#endif
}
void Elasticity_Kee2023::saveState(BinaryFileWriter &binWriter)
{
binWriter.writeBuffer((char*)m_current_to_initial_index.data(), m_current_to_initial_index.size() * sizeof(unsigned int));
binWriter.writeBuffer((char*)m_initial_to_current_index.data(), m_initial_to_current_index.size() * sizeof(unsigned int));
binWriter.writeBuffer((char*)m_L.data(), m_L.size() * sizeof(Matrix3r));
binWriter.writeBuffer((char*)m_rotations.data(), m_rotations.size() * sizeof(Matrix3r));
}
void Elasticity_Kee2023::loadState(BinaryFileReader &binReader)
{
binReader.readBuffer((char*)m_current_to_initial_index.data(), m_current_to_initial_index.size() * sizeof(unsigned int));
binReader.readBuffer((char*)m_initial_to_current_index.data(), m_initial_to_current_index.size() * sizeof(unsigned int));
binReader.readBuffer((char*)m_L.data(), m_L.size() * sizeof(Matrix3r));
binReader.readBuffer((char*)m_rotations.data(), m_rotations.size() * sizeof(Matrix3r));
}
void Elasticity_Kee2023::computeXTilde(ElasticObject* obj)
{
const std::vector<unsigned int>& group = obj->m_particleIndices;
const int numParticles = (int)group.size();
const Real dt = obj->m_factorization->m_dt;
#ifdef USE_AVX
auto& xTilde = obj->m_xTilde_avx;
#else
auto& xTilde = obj->m_xTilde;
#endif
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Vector3r xT = m_model->getPosition(particleIndex) + dt * m_model->getVelocity(particleIndex);
#ifdef USE_AVX
xTilde[i] = Scalarf8((float)xT[0], (float)xT[1], (float)xT[2], 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
#else
xTilde[i] = xT;
#endif
}
}
void Elasticity_Kee2023::updateVelocity(ElasticObject* obj, Real fdt)
{
const std::vector<unsigned int>& group = obj->m_particleIndices;
const int numParticles = (int)group.size();
#ifdef USE_AVX
const auto& xk = obj->m_xk_avx;
#else
const auto& xk = obj->m_xk;
#endif
const Real damping = static_cast<Real>(0.0);
const Real invFdt = (1 - damping) / fdt;
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
if (m_model->getParticleState(particleIndex) == ParticleState::Active)
{
const Vector3r& x = m_model->getPosition(particleIndex);
#ifdef USE_AVX
float v[8];
xk[i].store(v);
const Vector3r xk_i((Real)v[0], (Real)v[1], (Real)v[2]);
m_model->getVelocity(particleIndex) = invFdt * (xk_i - x);
#else
m_model->getVelocity(particleIndex) = invFdt * (xk[i] - x);
#endif
}
}
}
#ifdef USE_AVX
Real Elasticity_Kee2023::computeEnergy(ElasticObject* obj)
{
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
auto& D = obj->m_factorization->m_D;
auto& HT_K_H = obj->m_factorization->m_matHTH;
auto& f = obj->m_f_avx; // Scalarf8 (3*numParticles entries)
auto& xk = obj->m_xk_avx; // Scalarf8 (numParticles entries)
auto& xTilde = obj->m_xTilde_avx;
const Real fdt = obj->m_factorization->m_dt;
Real elasticEnergy = 0;
Real massEnergy = 0;
// F = D * xk (coord-packed AVX sparse matvec)
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < 3 * numParticles; i++)
f[i].setZero();
#pragma omp for schedule(static)
for (int k = 0; k < D.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, k); it; ++it)
f[it.row()] += Scalarf8((float)it.value()) * xk[it.col()];
}
// Elastic energy: sum V_i * Psi(F_i)
#pragma omp for reduction(+:elasticEnergy) schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
float x0[8], x1[8], x2[8];
f[3 * i].store(x0);
f[3 * i + 1].store(x1);
f[3 * i + 2].store(x2);
Matrix3r Fi;
Fi << x0[0], x0[1], x0[2],
x1[0], x1[1], x1[2],
x2[0], x2[1], x2[2];
Eigen::JacobiSVD<Matrix3r> svd_i(Fi, Eigen::ComputeFullU | Eigen::ComputeFullV);
Matrix3r U = svd_i.matrixU();
if ((U * svd_i.matrixV().transpose()).determinant() < 0)
U.col(2) = -U.col(2);
Matrix3r Ri = U * svd_i.matrixV().transpose();
elasticEnergy += m_restVolumes[particleIndex] * computePsi(Fi, Ri);
}
}
// Inertial energy: (1/2) m_i ||xk_i - xTilde_i||^2
// Vertical accumulation: each thread keeps a Scalarf8 acc, reduces once at the end.
#pragma omp parallel reduction(+:massEnergy)
{
Scalarf8 acc; acc.setZero();
#pragma omp for schedule(static) nowait
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Real mass = m_model->getMass(particleIndex);
const Scalarf8 diff = xk[i] - xTilde[i];
acc += Scalarf8(static_cast<float>(0.5 * mass)) * diff * diff;
}
massEnergy += (Real)acc.reduce();
}
// Zero-energy mode: (1/2) xk^T HTH xk
Real zeroEnergy = 0;
if (m_alpha != 0.0)
{
#pragma omp parallel for reduction(+:zeroEnergy) schedule(static)
for (int k = 0; k < HT_K_H.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(HT_K_H, k); it; ++it)
zeroEnergy += it.value() * (Real)(xk[it.row()] * xk[it.col()]).reduce();
}
zeroEnergy *= static_cast<Real>(0.5);
}
return massEnergy + fdt * fdt * elasticEnergy + zeroEnergy;
}
#else
Real Elasticity_Kee2023::computeEnergy(ElasticObject* obj)
{
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
auto& D = obj->m_factorization->m_D;
auto& HT_K_H = obj->m_factorization->m_matHTH;
auto& f = obj->m_f;
auto& xk = obj->m_xk;
const Real fdt = obj->m_factorization->m_dt;
Real elasticEnergy = 0;
Real massEnergy = 0;
// F = D * xk
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
f[3 * i].setZero();
f[3 * i + 1].setZero();
f[3 * i + 2].setZero();
}
#pragma omp for schedule(static)
for (int k = 0; k < D.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, k); it; ++it)
f[it.row()] += it.value() * xk[it.col()];
}
// Elastic energy: sum V_i * Psi(F_i)
#pragma omp for reduction(+:elasticEnergy) schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
Matrix3r Fi;
Fi << f[3 * i][0], f[3 * i][1], f[3 * i][2],
f[3 * i + 1][0], f[3 * i + 1][1], f[3 * i + 1][2],
f[3 * i + 2][0], f[3 * i + 2][1], f[3 * i + 2][2];
Eigen::JacobiSVD<Matrix3r> svd_i(Fi, Eigen::ComputeFullU | Eigen::ComputeFullV);
Matrix3r U = svd_i.matrixU();
if ((U * svd_i.matrixV().transpose()).determinant() < 0)
U.col(2) = -U.col(2);
Matrix3r Ri = U * svd_i.matrixV().transpose();
elasticEnergy += m_restVolumes[particleIndex] * computePsi(Fi, Ri);
}
}
// Inertial energy: (1/2) m_i ||xk_i - xTilde_i||^2
auto& xTilde = obj->m_xTilde;
#pragma omp parallel for reduction(+:massEnergy) schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Real mass = m_model->getMass(particleIndex);
massEnergy += static_cast<Real>(0.5) * mass * (xk[i] - xTilde[i]).squaredNorm();
}
// Zero-energy mode: (1/2) xk^T HTH xk
Real zeroEnergy = 0;
if (m_alpha != 0.0)
{
#pragma omp parallel for reduction(+:zeroEnergy) schedule(static)
for (int k = 0; k < HT_K_H.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(HT_K_H, k); it; ++it)
zeroEnergy += it.value() * xk[it.row()].dot(xk[it.col()]);
}
zeroEnergy *= static_cast<Real>(0.5);
}
return massEnergy + fdt * fdt * elasticEnergy + zeroEnergy;
}
#endif
#ifdef USE_AVX
Real Elasticity_Kee2023::computeEnergyAndGradient(ElasticObject* obj)
{
Simulation *sim = Simulation::getCurrent();
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
auto& D = obj->m_factorization->m_D;
auto& HT_K_H = obj->m_factorization->m_matHTH;
auto& gradient = obj->m_gradient_avx; // Scalarf8
auto& f = obj->m_f_avx; // Scalarf8 (3*numParticles)
auto& xk = obj->m_xk_avx; // Scalarf8
auto& xTilde = obj->m_xTilde_avx; // Scalarf8
const Real fdt = obj->m_factorization->m_dt;
Real elasticEnergy = 0;
Real massEnergy = 0;
// 1. F = D * xk (coord-packed AVX sparse matvec, Kugelstadt Pattern A)
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < 3 * numParticles; i++)
f[i].setZero();
#pragma omp for schedule(static)
for (int k = 0; k < D.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, k); it; ++it)
f[it.row()] += Scalarf8((float)it.value()) * xk[it.col()];
}
#pragma omp for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
float x0[8], x1[8], x2[8];
f[3 * i].store(x0);
f[3 * i + 1].store(x1);
f[3 * i + 2].store(x2);
m_F[particleIndex] << x0[0], x0[1], x0[2],
x1[0], x1[1], x1[2],
x2[0], x2[1], x2[2];
}
}
// 2. Polar decomposition via SVD (scalar — no AVX SVD available).
// Precompute P^T * L for each particle (stored in m_PL).
// Accumulate elastic energy density Psi(F_i) * V_i.
#pragma omp parallel default(shared)
{
#pragma omp for reduction(+:elasticEnergy) schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
Eigen::JacobiSVD<Matrix3r> svd_i(m_F[particleIndex], Eigen::ComputeFullU | Eigen::ComputeFullV);
Matrix3r U = svd_i.matrixU();
if ((U * svd_i.matrixV().transpose()).determinant() < 0)
U.col(2) = -U.col(2);
m_rotations[particleIndex] = U * svd_i.matrixV().transpose();
const Matrix3r P_i = computeP(m_F[particleIndex], m_rotations[particleIndex]);
m_PL[particleIndex] = P_i.transpose() * m_L[particleIndex];
elasticEnergy += m_restVolumes[particleIndex] * computePsi(m_F[particleIndex], m_rotations[particleIndex]);
gradient[i].setZero();
}
}
// 3. Elastic gradient (scalar neighbor loop; write Scalarf8 gradient).
// g_i = -dt^2 * V_i * sum_j V_j * (P_i^T L_i + P_j^T L_j) * gradW
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Vector3r xi0 = m_model->getPosition0(i0);
const Real V_i = m_restVolumes[particleIndex];
const Matrix3r& PtL_i = m_PL[particleIndex];
const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size();
const Matrix3f8 PtLi_avx(m_PL[particleIndex].cast<float>());
Vector3f8 force_avx;
force_avx.setZero();
for (unsigned int j = 0; j < numNeighbors; j += 8)
{
const unsigned int count = std::min(numNeighbors - j, 8u);
unsigned int nIndices[8];
for (auto k = 0u; k < count; k++)
nIndices[k] = m_initial_to_current_index[m_initialNeighbors[i0][j + k]];
const Matrix3f8 PtLj_avx = convertMat_zero(nIndices, &m_PL[0], count);
const Vector3f8& V_gradW = m_precomp_V_gradW8[m_precomputed_indices8[particleIndex] + j / 8];
force_avx += PtLi_avx * V_gradW + PtLj_avx * V_gradW;
}
Vector3r force;
force[0] = force_avx.x().reduce();
force[1] = force_avx.y().reduce();
force[2] = force_avx.z().reduce();
const Vector3r g = -(fdt * fdt * V_i * force);
gradient[i] = Scalarf8((float)g[0], (float)g[1], (float)g[2], 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
}
}
// 4. Mass term: g_i += m_i * (xk_i - xTilde_i)
// Vertical accumulation for massEnergy (one .reduce() per thread).
#pragma omp parallel reduction(+:massEnergy)
{
Scalarf8 acc; acc.setZero();
#pragma omp for schedule(static) nowait
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Real mass = m_model->getMass(particleIndex);
const Scalarf8 diff = xk[i] - xTilde[i];
gradient[i] += Scalarf8((float)mass) * diff;
acc += Scalarf8(static_cast<float>(0.5 * mass)) * diff * diff;
}
massEnergy += (Real)acc.reduce();
}
// 5. Zero-energy mode: g += H^T * K2 * H * xk (coord-packed AVX sparse matvec)
Real zeroEnergy = 0;
if (m_alpha != 0.0)
{
#pragma omp parallel default(shared)
{
#pragma omp for reduction(+:zeroEnergy) schedule(static)
for (int k = 0; k < HT_K_H.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(HT_K_H, k); it; ++it)
{
zeroEnergy += it.value() * (Real)(xk[it.row()] * xk[it.col()]).reduce();
gradient[it.col()] += Scalarf8((float)it.value()) * xk[it.row()];
}
}
}
zeroEnergy *= static_cast<Real>(0.5);
}
return massEnergy + fdt * fdt * elasticEnergy + zeroEnergy;
}
#else
Real Elasticity_Kee2023::computeEnergyAndGradient(ElasticObject* obj)
{
Simulation *sim = Simulation::getCurrent();
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
auto& D = obj->m_factorization->m_D;
auto& HT_K_H = obj->m_factorization->m_matHTH;
auto& gradient = obj->m_gradient;
auto& f = obj->m_f;
auto& xk = obj->m_xk;
const Real fdt = obj->m_factorization->m_dt;
Real elasticEnergy = 0;
Real massEnergy = 0;
// 1. Compute deformation gradient: F = D * xk -> m_F
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < numParticles; i++)
{
f[3 * i].setZero();
f[3 * i + 1].setZero();
f[3 * i + 2].setZero();
}
#pragma omp for schedule(static)
for (int k = 0; k < D.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, k); it; ++it)
f[it.row()] += it.value() * xk[it.col()];
}
#pragma omp for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
m_F[particleIndex] << f[3 * i][0], f[3 * i][1], f[3 * i][2],
f[3 * i + 1][0], f[3 * i + 1][1], f[3 * i + 1][2],
f[3 * i + 2][0], f[3 * i + 2][1], f[3 * i + 2][2];
}
}
// 2. Extract rotation R from F via polar decomposition
// Precompute P^T * L for each particle (stored in m_PL)
// Accumulate elastic energy density Psi(F_i) * V_i
#pragma omp parallel default(shared)
{
#pragma omp for reduction(+:elasticEnergy) schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
Eigen::JacobiSVD<Matrix3r> svd_i(m_F[particleIndex], Eigen::ComputeFullU | Eigen::ComputeFullV);
Matrix3r U = svd_i.matrixU();
if ((U * svd_i.matrixV().transpose()).determinant() < 0)
U.col(2) = -U.col(2);
m_rotations[particleIndex] = U * svd_i.matrixV().transpose();
const Matrix3r P_i = computeP(m_F[particleIndex], m_rotations[particleIndex]);
m_PL[particleIndex] = P_i.transpose() * m_L[particleIndex];
elasticEnergy += m_restVolumes[particleIndex] * computePsi(m_F[particleIndex], m_rotations[particleIndex]);
gradient[i].setZero();
}
}
// 3. Elastic gradient: g_i = -dt^2 * V_i * sum_j V_j * (P_i^T L_i + P_j^T L_j) * gradW
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Vector3r xi0 = m_model->getPosition0(i0);
const Real V_i = m_restVolumes[particleIndex];
const Matrix3r& PtL_i = m_PL[particleIndex];
const size_t numNeighbors = m_initialNeighbors[i0].size();
Vector3r force;
force.setZero();
for (unsigned int j = 0; j < numNeighbors; j++)
{
const unsigned int neighborIndex0 = m_initialNeighbors[i0][j];
const unsigned int neighborCurrent = m_initial_to_current_index[neighborIndex0];
const Matrix3r& PtL_j = m_PL[neighborCurrent];
force += (PtL_i + PtL_j) * m_precomp_V_gradW[m_precomputed_indices[particleIndex] + j];
}
gradient[i] = -(fdt * fdt * V_i * force);
}
}
// 4. Mass term: g_i += m_i * (xk_i - xTilde_i)
auto& xTilde = obj->m_xTilde;
#pragma omp parallel default(shared)
{
#pragma omp for reduction(+:massEnergy) schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Real mass = m_model->getMass(particleIndex);
gradient[i] += mass * (xk[i] - xTilde[i]);
massEnergy += static_cast<Real>(0.5) * mass * (xk[i] - xTilde[i]).squaredNorm();
}
}
// 5. Zero-energy mode control: g += H^T * K2 * H * xk
Real zeroEnergy = 0;
std::vector<Vector3r, Eigen::aligned_allocator<Vector3r>> gradient_ze(numParticles, Vector3r::Zero());
if (m_alpha != 0.0)
{
#pragma omp parallel default(shared)
{
#pragma omp for reduction(+:zeroEnergy) schedule(static)
for (int k = 0; k < HT_K_H.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(HT_K_H, k); it; ++it)
{
zeroEnergy += it.value() * xk[it.row()].dot(xk[it.col()]);
gradient_ze[it.col()] += it.value() * xk[it.row()];
}
}
}
zeroEnergy *= static_cast<Real>(0.5);
for (int i = 0; i < (int)numParticles; i++)
gradient[i] += gradient_ze[i];
}
return massEnergy + fdt * fdt * elasticEnergy + zeroEnergy;
}
#endif
void Elasticity_Kee2023::computeHessian(ElasticObject* obj)
{
if (m_materialType == ENUM_MATERIAL_STABLE_NEOHOOKEAN)
computeStableNeoHookeanHessian9x9(obj);
else // Co-rotated
computeCorotatedHessian9x9(obj);
}
void Elasticity_Kee2023::computeCorotatedHessian9x9(ElasticObject* obj)
{
const std::vector<unsigned int>& group = obj->m_particleIndices;
const int numParticles = (int)group.size();
const Real dt = obj->m_factorization->m_dt;
const Real dt2 = dt * dt;
const Real sqrt2inv = static_cast<Real>(1.0) / std::sqrt(static_cast<Real>(2.0));
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Matrix3r& Fi = m_F[particleIndex];
// SVD: F = U * Σ * V^T
Eigen::JacobiSVD<Matrix3r> svd_h(Fi, Eigen::ComputeFullU | Eigen::ComputeFullV);
Matrix3r U = svd_h.matrixU();
Matrix3r V = svd_h.matrixV();
Vector3r sigma = svd_h.singularValues();
// Ensure proper rotation (det = +1)
if ((U * V.transpose()).determinant() < 0)
{
U.col(2) = -U.col(2);
sigma[2] = -sigma[2];
}
// ======== ARAP Hessian: H = Σₖ ⟨λₖ⟩₊ qₖqₖᵀ ========
// Eigenvalues:
// twist modes: λ₀ = 1 - 2/(σ₀+σ₁), λ₁ = 1 - 2/(σ₁+σ₂), λ₂ = 1 - 2/(σ₀+σ₂)
// flip/scaling: λ₃..λ₈ = 1
Real lambda[3];
lambda[0] = 1 - 2 / (sigma[0] + sigma[1]);
lambda[1] = 1 - 2 / (sigma[1] + sigma[2]);
lambda[2] = 1 - 2 / (sigma[0] + sigma[2]);
// Clamp negative eigenvalues to 0 (PSD projection)
for (int k = 0; k < 3; k++)
if (lambda[k] < 0) lambda[k] = 0;
// Eigenvectors as vec(uᵢvⱼᵀ ± uⱼvᵢᵀ) / sqrt(2)
// Twist modes (skew-symmetric)
Matrix3r T0 = sqrt2inv * (U.col(0) * V.col(1).transpose() - U.col(1) * V.col(0).transpose());
Matrix3r T1 = sqrt2inv * (U.col(1) * V.col(2).transpose() - U.col(2) * V.col(1).transpose());
Matrix3r T2 = sqrt2inv * (U.col(0) * V.col(2).transpose() - U.col(2) * V.col(0).transpose());
// Flip modes (symmetric off-diagonal)
Matrix3r F0 = sqrt2inv * (U.col(0) * V.col(1).transpose() + U.col(1) * V.col(0).transpose());
Matrix3r F1 = sqrt2inv * (U.col(1) * V.col(2).transpose() + U.col(2) * V.col(1).transpose());
Matrix3r F2 = sqrt2inv * (U.col(0) * V.col(2).transpose() + U.col(2) * V.col(0).transpose());
// Scaling modes (diagonal)
Matrix3r S0 = U.col(0) * V.col(0).transpose();
Matrix3r S1 = U.col(1) * V.col(1).transpose();
Matrix3r S2 = U.col(2) * V.col(2).transpose();
// Vectorize (column-major)
auto vecMap = [](const Matrix3r& M) {
Eigen::Matrix<Real, 9, 1> v;
v << M(0,0), M(1,0), M(2,0), M(0,1), M(1,1), M(2,1), M(0,2), M(1,2), M(2,2);
return v;
};
Eigen::Matrix<Real, 9, 1> q[9];
q[0] = vecMap(T0); q[1] = vecMap(T1); q[2] = vecMap(T2);
q[3] = vecMap(F0); q[4] = vecMap(F1); q[5] = vecMap(F2);
q[6] = vecMap(S0); q[7] = vecMap(S1); q[8] = vecMap(S2);
// Reconstruct ARAP: H = 2μ Σₖ λₖ qₖqₖᵀ (flip/scaling have λ=1)
const Real mu2 = static_cast<Real>(2.0) * m_mu;
Eigen::Matrix<Real, 9, 9> K = Eigen::Matrix<Real, 9, 9>::Zero();
for (int k = 0; k < 3; k++)
K += (mu2 * lambda[k]) * q[k] * q[k].transpose();
for (int k = 3; k < 9; k++)
K += mu2 * q[k] * q[k].transpose();
// ======== Volume Hessian: λ * vec(R) * vec(R)^T ========
// d²/dvecF² of (λ/2)(tr(R^T F) - 3)² = λ * R ⊗ R (always PSD)
const Matrix3r Ri = U * V.transpose();
Eigen::Matrix<Real, 9, 1> vecR;
vecR << Ri(0,0), Ri(1,0), Ri(2,0), Ri(0,1), Ri(1,1), Ri(2,1), Ri(0,2), Ri(1,2), Ri(2,2);
K += m_lambda * vecR * vecR.transpose();
// Store unscaled material Hessian
// The dt² * Vi scaling is applied in the symmetric matvec
obj->m_hessian9x9[i] = K;
}
}
void Elasticity_Kee2023::computeStableNeoHookeanHessian9x9(ElasticObject* obj)
{
const std::vector<unsigned int>& group = obj->m_particleIndices;
const int numParticles = (int)group.size();
const Real sqrt2inv = static_cast<Real>(1.0) / std::sqrt(static_cast<Real>(2.0));
const Real eps_tol = static_cast<Real>(1e-12);
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Matrix3r& Fi = m_F[particleIndex];
// SVD: F = U * Σ * V^T
Eigen::JacobiSVD<Matrix3r> svd_h(Fi, Eigen::ComputeFullU | Eigen::ComputeFullV);
Matrix3r U = svd_h.matrixU();
Matrix3r V = svd_h.matrixV();
Vector3r sigma = svd_h.singularValues();
// Ensure proper rotation (det = +1)
if ((U * V.transpose()).determinant() < 0)
{
U.col(2) = -U.col(2);
sigma[2] = -sigma[2];
}
const Real s0 = sigma[0], s1 = sigma[1], s2 = sigma[2];
const Real I_C = s0*s0 + s1*s1 + s2*s2;
const Real J = s0 * s1 * s2;
// Scalars
const Real IC1 = I_C + 1;
const Real mu_T = m_mu * (1 - 1 / IC1); // Tikhonov coefficient
const Real mu_F = 2 * m_mu / (IC1 * IC1); // F rank-1 coefficient
const Real gamma_H = m_lambda * (J - 1) - static_cast<Real>(0.75) * m_mu; // volume coefficient
// Vectorize helper
auto vecMap = [](const Matrix3r& M) {
Eigen::Matrix<Real, 9, 1> v;
v << M(0,0), M(1,0), M(2,0), M(0,1), M(1,1), M(2,1), M(0,2), M(1,2), M(2,2);
return v;
};
// ======== Term 1: μ_T * I₉ (always PSD) ========
Eigen::Matrix<Real, 9, 9> K = mu_T * Eigen::Matrix<Real, 9, 9>::Identity();
// ======== Term 2: μ_F * vec(F) ⊗ vec(F) (rank-1, always PSD) ========
Eigen::Matrix<Real, 9, 1> vecF = vecMap(Fi);
K += mu_F * vecF * vecF.transpose();
// ======== Term 3: λ * vec(cof) ⊗ vec(cof) (rank-1, always PSD) ========
Vector3r c0 = Fi.col(1).cross(Fi.col(2));
Vector3r c1 = Fi.col(2).cross(Fi.col(0));
Vector3r c2 = Fi.col(0).cross(Fi.col(1));
Eigen::Matrix<Real, 9, 1> vecCof;
vecCof << c0[0], c0[1], c0[2], c1[0], c1[1], c1[2], c2[0], c2[1], c2[2];
K += m_lambda * vecCof * vecCof.transpose();
// ======== Term 4: γ_H * H_vol (volume Hessian with PSD projection) ========
// Same eigendecomposition as Co-rotated volume term
// Group 1: 6 eigenvalues ±σᵢ
Real volEig[6] = { s0, -s0, s1, -s1, s2, -s2 };
Matrix3r volD[6];
volD[0] << 0,0,0, 0,0,1, 0,-1,0;
volD[1] << 0,0,0, 0,0,1, 0,1,0;
volD[2] << 0,0,1, 0,0,0, -1,0,0;
volD[3] << 0,0,1, 0,0,0, 1,0,0;
volD[4] << 0,1,0, -1,0,0, 0,0,0;
volD[5] << 0,1,0, 1,0,0, 0,0,0;
for (int k = 0; k < 6; k++)
{
Real scaled = gamma_H * volEig[k];
if (scaled > 0)
{
Matrix3r Q = sqrt2inv * U * volD[k] * V.transpose();
Eigen::Matrix<Real, 9, 1> qv = vecMap(Q);
K += scaled * qv * qv.transpose();
}
}
// Group 2: 3 eigenvalues from depressed cubic
if (I_C > eps_tol)
{
const Real sqrtIC3 = std::sqrt(I_C / static_cast<Real>(3.0));
const Real cosArg = static_cast<Real>(3.0) * J / I_C * std::sqrt(static_cast<Real>(3.0) / I_C);
const Real clampedCos = std::max(static_cast<Real>(-1.0), std::min(static_cast<Real>(1.0), cosArg));
const Real theta = std::acos(clampedCos);
const Real pi = static_cast<Real>(3.14159265358979323846);
for (int k = 0; k < 3; k++)
{
Real eps_k = static_cast<Real>(2.0) * sqrtIC3 * std::cos((theta + static_cast<Real>(2.0) * pi * k) / static_cast<Real>(3.0));
Real scaled = gamma_H * eps_k;
if (scaled > 0)
{
Vector3r diag_k(s0*s2 + s1*eps_k, s1*s2 + s0*eps_k, eps_k*eps_k - s2*s2);
Real norm_k = diag_k.norm();
if (norm_k > eps_tol)
{
diag_k /= norm_k;
Matrix3r Q = U * diag_k.asDiagonal() * V.transpose();
Eigen::Matrix<Real, 9, 1> qv = vecMap(Q);
K += scaled * qv * qv.transpose();
}
}
}
}
obj->m_hessian9x9[i] = K;
}
}
void Elasticity_Kee2023::computeNewtonPreconditioner(ElasticObject* obj)
{
const std::vector<unsigned int>& group = obj->m_particleIndices;
const int numParticles = (int)group.size();
const int nFree = numParticles - (int)obj->m_nFixed;
auto& D = obj->m_factorization->m_D;
auto& HTH = obj->m_factorization->m_matHTH;
auto& precond = obj->m_pcg_precond;
// Initialize diagonal blocks with mass
#pragma omp parallel for schedule(static)
for (int j = 0; j < nFree; j++)
{
const unsigned int j0 = group[j];
const unsigned int pj = m_initial_to_current_index[j0];
const Real mj = m_model->getMass(pj);
precond[j] = Matrix3r::Identity() * mj;
}
// Add HTH diagonal (zero-energy mode control)
if (m_alpha != static_cast<Real>(0.0))
{
for (int outer = 0; outer < HTH.outerSize(); outer++)
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(HTH, outer); it; ++it)
{
const int j = (int)it.row();
if (j >= nFree) continue;
if (it.row() == it.col()) // diagonal only
{
for (int b = 0; b < 3; b++)
precond[j](b, b) += it.value();
}
}
}
// Add elastic contribution: for each particle i, accumulate dt² * V_i * d_j^T K_i d_j to the j-th diagonal block
// K_i is the unscaled material Hessian - dt² * V_i scaling is applied here
const Real dt = obj->m_factorization->m_dt;
const Real dt2 = dt * dt;
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Real V_i = m_restVolumes[particleIndex];
const Real scale = dt2 * V_i; // Scale factor for this particle's contribution
const Eigen::Matrix<Real, 9, 9>& Ki = obj->m_hessian9x9[i];
// Gather D values for particle i
int nCols = 0;
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, 3 * i); it; ++it)
nCols++;
std::vector<int> cols(nCols);
std::vector<Real> dv0(nCols), dv1(nCols), dv2(nCols);
int idx = 0;
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, 3 * i); it; ++it)
{ cols[idx] = (int)it.col(); dv0[idx] = it.value(); idx++; }
idx = 0;
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, 3 * i + 1); it; ++it)
{ dv1[idx] = it.value(); idx++; }
idx = 0;
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, 3 * i + 2); it; ++it)
{ dv2[idx] = it.value(); idx++; }
// For each neighbor j, compute contribution to precond[j]
for (int ji = 0; ji < nCols; ji++)
{
const int j = cols[ji];
if (j >= nFree) continue;
// d_j is a 9-vector: d_j[3b+a] = D[3i+b, j] * delta(a matches column index)
// Simplified: d_j^T K_i d_j is a 3x3 block
const Real dv[3] = { dv0[ji], dv1[ji], dv2[ji] };
// Compute 3x3 block: dt² * V_i * Σ_{b,c} dv[b] * K_i[3b+a, 3c+a'] * dv[c] for each (a,a')
for (int a = 0; a < 3; a++)
for (int ap = 0; ap < 3; ap++)
{
Real val = 0;
for (int b = 0; b < 3; b++)
for (int c = 0; c < 3; c++)
val += dv[b] * Ki(3 * b + a, 3 * c + ap) * dv[c];
precond[j](a, ap) += scale * val;
}
}
}
// Invert each 3x3 block
#pragma omp parallel for schedule(static)
for (int j = 0; j < nFree; j++)
precond[j] = precond[j].inverse();
}
#ifdef USE_AVX
void Elasticity_Kee2023::newtonMatvec(ElasticObject* obj)
{
const std::vector<unsigned int>& group = obj->m_particleIndices;
const int numParticles = (int)group.size();
const int nFree = numParticles - (int)obj->m_nFixed;
auto& p_avx = obj->m_pcg_p_avx;
auto& Ap_avx = obj->m_pcg_Ap_avx;
const Real dt = obj->m_factorization->m_dt;
const Real dt2 = dt * dt;
auto& D = obj->m_factorization->m_D;
auto& HTH = obj->m_factorization->m_matHTH;
auto& f_avx = obj->m_f_avx;
// Step 1: Ap_avx = M * p_avx
#pragma omp parallel for schedule(static)
for (int j = 0; j < nFree; j++)
{
const unsigned int j0 = group[j];
const unsigned int pj = m_initial_to_current_index[j0];
Ap_avx[j] = Scalarf8((float)m_model->getMass(pj)) * p_avx[j];
}
// Step 2: Ap_avx += HTH * p_avx
if (m_alpha != static_cast<Real>(0.0))
{
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int k = 0; k < HTH.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(HTH, k); it; ++it)
{
const int row = (int)it.row();
const int col = (int)it.col();
if (row < nFree && col < nFree)
Ap_avx[col] = Ap_avx[col] + Scalarf8((float)it.value()) * p_avx[row];
}
}
}
}
// Step 3: Dx = D * p (coord-packed AVX sparse matvec)
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < 3 * numParticles; i++)
f_avx[i].setZero();
#pragma omp for schedule(static)
for (int k = 0; k < D.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, k); it; ++it)
{
const int col = (int)it.col();
if (col < nFree)
f_avx[it.row()] = f_avx[it.row()] + Scalarf8((float)it.value()) * p_avx[col];
}
}
}
// Step 4: K*Dx → dPtL (scalar — 9×9 Hessian multiply)
const unsigned int numActiveParticles = m_model->numActiveParticles();
std::vector<Matrix3r, Eigen::aligned_allocator<Matrix3r>> dPtL(numActiveParticles, Matrix3r::Zero());
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
float x0[8], x1[8], x2[8];
f_avx[3 * i].store(x0);
f_avx[3 * i + 1].store(x1);
f_avx[3 * i + 2].store(x2);
Eigen::Matrix<Real, 9, 1> Dxi;
Dxi << (Real)x0[0], (Real)x1[0], (Real)x2[0],
(Real)x0[1], (Real)x1[1], (Real)x2[1],
(Real)x0[2], (Real)x1[2], (Real)x2[2];
Eigen::Matrix<Real, 9, 1> KDx_vec = obj->m_hessian9x9[i] * Dxi;
Matrix3r dP;
dP.col(0) = KDx_vec.segment<3>(0);
dP.col(1) = KDx_vec.segment<3>(3);
dP.col(2) = KDx_vec.segment<3>(6);
dPtL[particleIndex] = dP.transpose() * m_L[particleIndex];
}
// Step 5: Force loop (AVX gather with precomputed V_gradW8), accumulate into Ap_avx
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const float V_i = (float)m_restVolumes[particleIndex];
const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size();
const Matrix3f8 dPtLi_avx(dPtL[particleIndex].cast<float>());
Vector3f8 force_avx;
force_avx.setZero();
for (unsigned int j = 0; j < numNeighbors; j += 8)
{
const unsigned int count = std::min(numNeighbors - j, 8u);
unsigned int nIndices[8];
for (auto k = 0u; k < count; k++)
nIndices[k] = m_initial_to_current_index[m_initialNeighbors[i0][j + k]];
const Matrix3f8 dPtLj_avx = convertMat_zero(nIndices, &dPtL[0], count);
const Vector3f8& V_gradW = m_precomp_V_gradW8[m_precomputed_indices8[particleIndex] + j / 8];
force_avx += dPtLi_avx * V_gradW + dPtLj_avx * V_gradW;
}
const Scalarf8 fx(force_avx.x().reduce(), force_avx.y().reduce(), force_avx.z().reduce(), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
Ap_avx[i] = Ap_avx[i] - Scalarf8((float)dt2 * V_i) * fx;
}
}
#else
void Elasticity_Kee2023::newtonMatvec(ElasticObject* obj)
{
const std::vector<unsigned int>& group = obj->m_particleIndices;
const int numParticles = (int)group.size();
const int nFree = numParticles - (int)obj->m_nFixed;
auto& x = obj->m_pcg_p;
auto& Ax = obj->m_pcg_Ap;
const Real dt = obj->m_factorization->m_dt;
const Real dt2 = dt * dt;
auto& D = obj->m_factorization->m_D;
auto& HTH = obj->m_factorization->m_matHTH;
// Initialize Ax = M*x
#pragma omp parallel for schedule(static)
for (int j = 0; j < nFree; j++)
{
const unsigned int j0 = group[j];
const unsigned int pj = m_initial_to_current_index[j0];
const Real mj = m_model->getMass(pj);
Ax[j] = mj * x[j];
}
// Add H^T*K_ze*H*x (zero-energy mode control)
// Col-write pattern: Ax[col] += val * x[row] (HTH is symmetric, avoids data race)
if (m_alpha != static_cast<Real>(0.0))
{
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int k = 0; k < HTH.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(HTH, k); it; ++it)
{
const int row = (int)it.row();
const int col = (int)it.col();
if (row < nFree && col < nFree)
Ax[col] += it.value() * x[row];
}
}
}
}
// D * x
std::vector<Eigen::Matrix<Real, 9, 1>> Dx(numParticles);
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
{
Eigen::Matrix<Real, 9, 1> yi = Eigen::Matrix<Real, 9, 1>::Zero();
for (int a = 0; a < 3; a++)
{
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, 3 * i + a); it; ++it)
{
const int j = (int)it.col();
if (j < nFree)
{
for (int b = 0; b < 3; b++)
yi(3 * b + a) += it.value() * x[j][b];
}
}
}
Dx[i] = yi;
}
// K * Dx → dPtL
const unsigned int numActiveParticles = m_model->numActiveParticles();
std::vector<Matrix3r, Eigen::aligned_allocator<Matrix3r>> dPtL(numActiveParticles, Matrix3r::Zero());
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
Eigen::Matrix<Real, 9, 1> KDx_vec = obj->m_hessian9x9[i] * Dx[i];
Matrix3r dP;
dP.col(0) = KDx_vec.segment<3>(0);
dP.col(1) = KDx_vec.segment<3>(3);
dP.col(2) = KDx_vec.segment<3>(6);
dPtL[particleIndex] = dP.transpose() * m_L[particleIndex];
}
// Force loop
Simulation* sim = Simulation::getCurrent();
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Vector3r xi0 = m_model->getPosition0(i0);
const Real V_i = m_restVolumes[particleIndex];
const Matrix3r& dPtL_i = dPtL[particleIndex];
const size_t numNeighbors = m_initialNeighbors[i0].size();
Vector3r force;
force.setZero();
for (unsigned int jn = 0; jn < numNeighbors; jn++)
{
const unsigned int j0 = m_initialNeighbors[i0][jn];
const unsigned int jCurrent = m_initial_to_current_index[j0];
const Matrix3r& dPtL_j = dPtL[jCurrent];
force += (dPtL_i + dPtL_j) * m_precomp_V_gradW[m_precomputed_indices[particleIndex] + jn];
}
Ax[i] -= dt2 * V_i * force;
}
}
#endif
#ifdef USE_AVX
int Elasticity_Kee2023::matFreePCG(ElasticObject* obj)
{
computeNewtonPreconditioner(obj);
const int nFree = (int)obj->m_dx_avx.size();
auto& gradient = obj->m_gradient_avx;
auto& dx = obj->m_dx_avx;
auto& r = obj->m_pcg_r_avx;
auto& p = obj->m_pcg_p_avx;
auto& Ap = obj->m_pcg_Ap_avx;
auto& z = obj->m_pcg_z_avx;
auto& precond = obj->m_pcg_precond;
// Initialize: x_0 = 0, r_0 = b = -gradient
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
{
dx[i].setZero();
r[i] = Scalarf8(0.0f) - gradient[i];
}
// z_0 = M^{-1} r_0 (apply block-diagonal preconditioner)
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
{
float rv[8]; r[i].store(rv);
Vector3r ri((Real)rv[0], (Real)rv[1], (Real)rv[2]);
Vector3r zi = precond[i] * ri;
z[i] = Scalarf8((float)zi[0], (float)zi[1], (float)zi[2], 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
}
// p_0 = z_0
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
p[i] = z[i];
// rz_old = r_0^T z_0 (dot product via Scalarf8: (r*z).reduce() sums x*x + y*y + z*z + 0+...)
Real rz_old = 0;
#pragma omp parallel for reduction(+:rz_old) schedule(static)
for (int i = 0; i < nFree; i++)
rz_old += (Real)(r[i] * z[i]).reduce();
// Compute initial residual norm for convergence check
Real r0_norm = 0;
#pragma omp parallel for reduction(+:r0_norm) schedule(static)
for (int i = 0; i < nFree; i++)
r0_norm += (Real)(r[i] * r[i]).reduce();
r0_norm = std::sqrt(r0_norm);
const Real tol = m_tolCG * r0_norm;
int iter = 0;
for (; iter < m_maxIterCG; iter++)
{
// Ap = A * p
newtonMatvec(obj);
// alpha = rz_old / (p^T Ap)
Real pAp = 0;
#pragma omp parallel for reduction(+:pAp) schedule(static)
for (int i = 0; i < nFree; i++)
pAp += (Real)(p[i] * Ap[i]).reduce();
if (pAp <= static_cast<Real>(0))
{
LOG_ERR << "PCG: pAp <= 0 at iter " << iter << ", pAp = " << pAp
<< " (matrix not positive definite)";
break;
}
const Scalarf8 alpha_avx((float)(rz_old / pAp));
// x_{k+1} = x_k + alpha * p_k
// r_{k+1} = r_k - alpha * Ap
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
{
dx[i] = dx[i] + alpha_avx * p[i];
r[i] = r[i] - alpha_avx * Ap[i];
}
// Check convergence
Real r_norm = 0;
#pragma omp parallel for reduction(+:r_norm) schedule(static)
for (int i = 0; i < nFree; i++)
r_norm += (Real)(r[i] * r[i]).reduce();
r_norm = std::sqrt(r_norm);
if (r_norm < tol)
{
iter++; // count this iteration
break;
}
// z_{k+1} = M^{-1} r_{k+1} (apply block-diagonal preconditioner)
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
{
float rv[8]; r[i].store(rv);
Vector3r ri((Real)rv[0], (Real)rv[1], (Real)rv[2]);
Vector3r zi = precond[i] * ri;
z[i] = Scalarf8((float)zi[0], (float)zi[1], (float)zi[2], 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
}
// rz_new = r_{k+1}^T z_{k+1}
Real rz_new = 0;
#pragma omp parallel for reduction(+:rz_new) schedule(static)
for (int i = 0; i < nFree; i++)
rz_new += (Real)(r[i] * z[i]).reduce();
// beta = rz_new / rz_old
const Scalarf8 beta_avx((float)(rz_new / rz_old));
// p_{k+1} = z_{k+1} + beta * p_k
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
p[i] = z[i] + beta_avx * p[i];
rz_old = rz_new;
}
return iter;
}
#else
int Elasticity_Kee2023::matFreePCG(ElasticObject* obj)
{
computeNewtonPreconditioner(obj);
const int nFree = (int)obj->m_dx.size();
auto& gradient = obj->m_gradient;
auto& dx = obj->m_dx;
auto& r = obj->m_pcg_r;
auto& p = obj->m_pcg_p;
auto& Ap = obj->m_pcg_Ap;
auto& z = obj->m_pcg_z;
auto& precond = obj->m_pcg_precond;
// Initialize: x_0 = 0, r_0 = b = -gradient
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
{
dx[i].setZero();
r[i] = -gradient[i];
}
// z_0 = M^{-1} r_0 (apply block-diagonal preconditioner)
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
z[i] = precond[i] * r[i];
// p_0 = z_0
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
p[i] = z[i];
// rz_old = r_0^T z_0
Real rz_old = 0;
#pragma omp parallel for reduction(+:rz_old) schedule(static)
for (int i = 0; i < nFree; i++)
rz_old += r[i].dot(z[i]);
// Compute initial residual norm for convergence check
Real r0_norm = 0;
#pragma omp parallel for reduction(+:r0_norm) schedule(static)
for (int i = 0; i < nFree; i++)
r0_norm += r[i].squaredNorm();
r0_norm = std::sqrt(r0_norm);
const Real tol = m_tolCG * r0_norm;
int iter = 0;
for (; iter < m_maxIterCG; iter++)
{
// Ap = A * p
newtonMatvec(obj);
// alpha = rz_old / (p^T Ap)
Real pAp = 0;
#pragma omp parallel for reduction(+:pAp) schedule(static)
for (int i = 0; i < nFree; i++)
pAp += p[i].dot(Ap[i]);
if (pAp <= static_cast<Real>(0))
{
LOG_ERR << "PCG: pAp <= 0 at iter " << iter << ", pAp = " << pAp
<< " (matrix not positive definite)";
break;
}
const Real alpha = rz_old / pAp;
// x_{k+1} = x_k + alpha * p_k
// r_{k+1} = r_k - alpha * Ap
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
{
dx[i] += alpha * p[i];
r[i] -= alpha * Ap[i];
}
// Check convergence
Real r_norm = 0;
#pragma omp parallel for reduction(+:r_norm) schedule(static)
for (int i = 0; i < nFree; i++)
r_norm += r[i].squaredNorm();
r_norm = std::sqrt(r_norm);
if (r_norm < tol)
{
iter++; // count this iteration
break;
}
// z_{k+1} = M^{-1} r_{k+1} (apply block-diagonal preconditioner)
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
z[i] = precond[i] * r[i];
// rz_new = r_{k+1}^T z_{k+1}
Real rz_new = 0;
#pragma omp parallel for reduction(+:rz_new) schedule(static)
for (int i = 0; i < nFree; i++)
rz_new += r[i].dot(z[i]);
// beta = rz_new / rz_old
const Real beta = rz_new / rz_old;
// p_{k+1} = z_{k+1} + beta * p_k
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
p[i] = z[i] + beta * p[i];
rz_old = rz_new;
}
return iter;
}
#endif
void Elasticity_Kee2023::prefactorizedLLTSolve(ElasticObject* obj)
{
#ifdef USE_AVX
auto& dx = obj->m_dx_avx;
#else
auto& dx = obj->m_dx;
#endif
const int n = (int)dx.size();
#ifdef USE_AVX
// dx is std::vector<Scalarf8> with 3 active lanes per element (x, y, z, 0, 0, 0, 0, 0).
// CholeskyAVXSolver::solve takes float* with stride, so we pack/unpack at this boundary.
VectorXr b(3 * n), x(3 * n);
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
{
float vals[8];
dx[i].store(vals);
b[3 * i] = vals[0];
b[3 * i + 1] = vals[1];
b[3 * i + 2] = vals[2];
}
// Solve 3 independent n-systems (one per coordinate, stride 3)
#pragma omp parallel for schedule(static)
for (int c = 0; c < 3; c++)
obj->m_factorization->m_cholesky->solve(&x[c], &b[c], 3);
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
dx[i] = Scalarf8(x[3 * i], x[3 * i + 1], x[3 * i + 2], 0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
#else
auto& dx_perm = obj->m_dx_perm;
auto& permInd = obj->m_factorization->m_permInd;
auto& permInvInd = obj->m_factorization->m_permInvInd;
auto& matL = obj->m_factorization->m_matL;
auto& matLT = obj->m_factorization->m_matLT;
// Permute dx (fill-in reduction ordering)
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < n; i++)
dx_perm[permInd[i]] = dx[i];
}
// Forward substitution: L * y = RHS_perm
for (int k = 0; k < matL.outerSize(); ++k)
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(matL, k); it; ++it)
if (it.row() == it.col())
dx_perm[it.row()] /= it.value();
else
dx_perm[it.row()] -= it.value() * dx_perm[it.col()];
// Backward substitution: L^T * dx = y
for (int k = (int) matLT.outerSize() - 1; k >= 0; --k)
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::ReverseInnerIterator it(matLT, k); it; --it)
if (it.row() == it.col())
dx_perm[it.row()] /= it.value();
else
dx_perm[it.row()] -= it.value() * dx_perm[it.col()];
// Inverse permutation
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < n; i++)
dx[permInvInd[i]] = dx_perm[i];
}
#endif
}
#ifdef USE_AVX
Real Elasticity_Kee2023::newtonSolve(ElasticObject* obj, int& cgIter)
{
// Energy + gradient in AVX (fills gradient_avx, m_F, m_rotations, m_PL)
Real energy = computeEnergyAndGradient(obj);
// Hessian (scalar, reads m_F, writes m_hessian9x9)
computeHessian(obj);
// CG in AVX (reads gradient_avx, writes dx_avx)
cgIter = matFreePCG(obj);
return energy;
}
#else
Real Elasticity_Kee2023::newtonSolve(ElasticObject* obj, int& cgIter)
{
Real energy = computeEnergyAndGradient(obj); // sets gradient, m_F, m_rotations
computeHessian(obj); // computes 9×9 per particle, assembles 3N system
cgIter = matFreePCG(obj); // solves 3N scalar system, writes solution into dx
// DEBUG: check descent direction and Hessian eigenvalues
{
const auto& gradient = obj->m_gradient;
const auto& dx = obj->m_dx; // dx
const int n = (int)dx.size();
// Gradient norm
Real gradNorm = 0;
for (int i = 0; i < n; i++)
gradNorm += gradient[i].squaredNorm();
gradNorm = std::sqrt(gradNorm);
// Descent: g · dx (should be negative for descent)
Real gDotDx = 0;
for (int i = 0; i < n; i++)
gDotDx += gradient[i].dot(dx[i]);
// Sample 9x9 Hessian eigenvalues for particle 0
if (!obj->m_hessian9x9.empty())
{
Eigen::SelfAdjointEigenSolver<Eigen::Matrix<Real, 9, 9>> eig(obj->m_hessian9x9[0]);
const auto& evals = eig.eigenvalues();
std::cout << "DEBUG Newton: E=" << energy << " |g|=" << gradNorm
<< " g·dx=" << gDotDx << " H[0] eigs=[" << evals.transpose() << "]" << std::endl;
}
else
{
std::cout << "DEBUG Newton: E=" << energy << " |g|=" << gradNorm
<< " g·dx=" << gDotDx << " (no Hessian)" << std::endl;
}
}
return energy;
}
#endif
#ifdef USE_AVX
Real Elasticity_Kee2023::lbfgsSolve(ElasticObject* obj)
{
auto& dx = obj->m_dx_avx; // Scalarf8
const int windowSize = m_lbfgsWindowSize;
const int n = (int)dx.size();
int& count = obj->m_lbfgs_count;
auto& lbfgs_s = obj->m_lbfgs_s_avx; // Scalarf8 secant history
auto& lbfgs_y = obj->m_lbfgs_y_avx; // Scalarf8 secant history
auto& lbfgs_rho = obj->m_lbfgs_rho; // Real
auto& lbfgs_alpha = obj->m_lbfgs_alpha; // Real
auto& last_sol = obj->m_lbfgs_last_sol_avx; // Scalarf8
auto& gradient = obj->m_gradient_avx; // Scalarf8
auto& q = obj->m_lbfgs_q_avx; // Scalarf8
// Save g_{k-1} into q before computeEnergyAndGradient overwrites gradient
if (count > 0)
{
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
q[i] = gradient[i];
}
Real energy = computeEnergyAndGradient(obj); // gradient = g_k
// Compute secant pairs from actual position/gradient differences
if (count > 0 && windowSize > 0)
{
int slot = (count - 1) % windowSize;
// s_{k-1} = x_k - x_{k-1}
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
lbfgs_s[slot][i] = obj->m_xk_avx[i] - last_sol[i];
// y_{k-1} = g_k - g_{k-1}
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
lbfgs_y[slot][i] = gradient[i] - q[i];
Real ys = 0;
#pragma omp parallel reduction(+:ys)
{
Scalarf8 acc; acc.setZero();
#pragma omp for schedule(static) nowait
for (int i = 0; i < n; i++)
acc += lbfgs_y[slot][i] * lbfgs_s[slot][i];
ys += (Real)acc.reduce();
}
lbfgs_rho[slot] = (std::abs(ys) > static_cast<Real>(1e-10))
? static_cast<Real>(1.0) / ys : 0;
}
// Save current xk for next iteration, set q = g_k for two-loop
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
{
q[i] = gradient[i];
last_sol[i] = obj->m_xk_avx[i];
}
int nPairs = std::min(count, windowSize);
// Backward loop (newest -> oldest secant pair)
for (int j = 0; j < nPairs; j++)
{
int idx = ((count - 1 - j) % windowSize + windowSize) % windowSize;
Real sq = 0;
#pragma omp parallel reduction(+:sq)
{
Scalarf8 acc; acc.setZero();
#pragma omp for schedule(static) nowait
for (int i = 0; i < n; i++)
acc += lbfgs_s[idx][i] * q[i];
sq += (Real)acc.reduce();
}
lbfgs_alpha[j] = lbfgs_rho[idx] * sq;
const Scalarf8 alpha_avx((float)lbfgs_alpha[j]);
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
q[i] = q[i] - alpha_avx * lbfgs_y[idx][i];
}
// Apply initial inverse Hessian: dx = H_0^-1 * q
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
dx[i] = q[i];
prefactorizedLLTSolve(obj);
// Forward loop (oldest -> newest secant pair)
for (int j = nPairs - 1; j >= 0; j--)
{
int idx = ((count - 1 - j) % windowSize + windowSize) % windowSize;
Real yr = 0;
#pragma omp parallel reduction(+:yr)
{
Scalarf8 acc; acc.setZero();
#pragma omp for schedule(static) nowait
for (int i = 0; i < n; i++)
acc += lbfgs_y[idx][i] * dx[i];
yr += (Real)acc.reduce();
}
Real beta = lbfgs_rho[idx] * yr;
const Scalarf8 ab_avx((float)(lbfgs_alpha[j] - beta));
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
dx[i] = dx[i] + ab_avx * lbfgs_s[idx][i];
}
// Search direction: dx = -H*g
const Scalarf8 negOne(-1.0f);
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
dx[i] = negOne * dx[i];
count++;
return energy;
}
#else
Real Elasticity_Kee2023::lbfgsSolve(ElasticObject* obj)
{
auto& dx = obj->m_dx;
const int windowSize = m_lbfgsWindowSize;
const int n = (int)dx.size();
int& count = obj->m_lbfgs_count;
auto& lbfgs_s = obj->m_lbfgs_s;
auto& lbfgs_y = obj->m_lbfgs_y;
auto& lbfgs_rho = obj->m_lbfgs_rho;
auto& lbfgs_alpha = obj->m_lbfgs_alpha;
auto& last_sol = obj->m_lbfgs_last_sol;
auto& gradient = obj->m_gradient;
auto& q = obj->m_lbfgs_q;
// Save g_{k-1} into q before computeEnergyAndGradient overwrites gradient
if (count > 0)
{
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
q[i] = gradient[i];
}
// Compute energy and gradient at current xk
Real energy = computeEnergyAndGradient(obj); // gradient = g_k
// Compute secant pairs from actual position/gradient differences
if (count > 0 && windowSize > 0)
{
int slot = (count - 1) % windowSize;
// s_{k-1} = x_k - x_{k-1} (actual step taken, includes line search alpha)
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
lbfgs_s[slot][i] = obj->m_xk[i] - last_sol[i];
// y_{k-1} = g_k - g_{k-1} (q holds g_{k-1})
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
lbfgs_y[slot][i] = gradient[i] - q[i];
Real ys = 0;
#pragma omp parallel for reduction(+:ys) schedule(static)
for (int i = 0; i < n; i++)
ys += lbfgs_y[slot][i].dot(lbfgs_s[slot][i]);
lbfgs_rho[slot] = (std::abs(ys) > static_cast<Real>(1e-10))
? static_cast<Real>(1.0) / ys : 0;
}
// Save current xk for next iteration, set q = g_k for two-loop
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
{
q[i] = gradient[i];
last_sol[i] = obj->m_xk[i];
}
// Number of usable secant pairs
int nPairs = std::min(count, windowSize);
// L-BFGS two-loop recursion
// q contains g_k (gradient)
// Backward loop (newest to oldest secant pair)
for (int j = 0; j < nPairs; j++)
{
int idx = ((count - 1 - j) % windowSize + windowSize) % windowSize;
Real sq = 0;
#pragma omp parallel for reduction(+:sq) schedule(static)
for (int i = 0; i < n; i++)
sq += lbfgs_s[idx][i].dot(q[i]);
lbfgs_alpha[j] = lbfgs_rho[idx] * sq;
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
q[i] -= lbfgs_alpha[j] * lbfgs_y[idx][i];
}
// Apply initial inverse Hessian: r = A^{-1} * q
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
dx[i] = q[i];
prefactorizedLLTSolve(obj);
// dx now contains r = H_0^{-1} * q
// Forward loop (oldest to newest secant pair)
for (int j = nPairs - 1; j >= 0; j--)
{
int idx = ((count - 1 - j) % windowSize + windowSize) % windowSize;
Real yr = 0;
#pragma omp parallel for reduction(+:yr) schedule(static)
for (int i = 0; i < n; i++)
yr += lbfgs_y[idx][i].dot(dx[i]);
Real beta = lbfgs_rho[idx] * yr;
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
dx[i] += (lbfgs_alpha[j] - beta) * lbfgs_s[idx][i];
}
// Search direction: dx = -H*g
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
dx[i] = -dx[i];
count++;
return energy;
}
#endif
#ifndef USE_AVX
Real Elasticity_Kee2023::lineSearch(ElasticObject* obj, Real energy, int& lsIter)
{
lsIter = 0;
auto& x = obj->m_xk;
auto& dx = obj->m_dx;
auto& gradient = obj->m_gradient;
const int n = (int)dx.size();
// Directional derivative: g^T * dx (must be < 0 for descent)
Real dirDeriv = 0;
#pragma omp parallel for reduction(+:dirDeriv) schedule(static)
for (int i = 0; i < n; i++)
dirDeriv += gradient[i].dot(dx[i]);
if (dirDeriv >= 0)
LOG_WARN << "LS: dirDeriv=" << dirDeriv << " (not a descent direction)";
// Backup current xk
std::vector<Vector3r, Eigen::aligned_allocator<Vector3r>> xk_backup(n);
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
xk_backup[i] = x[i];
Real alpha = static_cast<Real>(1.0);
const Real eps = static_cast<Real>(1e-4);
for (int ls = 0; ls < m_maxLSIter; ls++)
{
// Set trial point: xk = xk_backup + alpha * dx
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
x[i] = xk_backup[i] + alpha * dx[i];
Real trialEnergy = computeEnergy(obj);
lsIter++;
// Armijo: f(x + alpha*dx) <= f(x) + c1*alpha*(g^T*dx)
if (trialEnergy <= energy + m_lsArmijoParam * alpha * dirDeriv)
{
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
x[i] = xk_backup[i];
return alpha;
}
// Energy difference negligible — accept alpha
if (std::abs(trialEnergy - energy) < eps)
{
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
x[i] = xk_backup[i];
return alpha;
}
alpha *= m_lsBeta;
}
// LS exhausted
LOG_WARN << "LS exhausted: dirDeriv=" << dirDeriv << " E0=" << energy;
// Restore original position
#pragma omp parallel for schedule(static)
for (int i = 0; i < n; i++)
x[i] = xk_backup[i];
return static_cast<Real>(0);
}
#endif
#ifdef USE_AVX
void Elasticity_Kee2023::stepElasticitySolver()
{
START_TIMING("Elasticity_Kee2023")
const unsigned int numActiveParticles = m_model->numActiveParticles();
if (numActiveParticles == 0)
return;
precomputeValues();
// AVX build supports both Newton (solverType=0) and L-BFGS (solverType=1).
size_t numObjects = m_objects.size();
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
START_TIMING("objSolve")
ElasticObject* obj = m_objects[objIndex];
auto& xk = obj->m_xk_avx; // Scalarf8
auto& dx = obj->m_dx_avx; // Scalarf8
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
const Real fdt = obj->m_factorization->m_dt;
// xTilde = x + dt*v (uses externally-set velocity, e.g. from AnimationField)
computeXTilde(obj);
// Fixed particles: DFSPH integrator skips them, so write xTilde back to model position
// so they advance by the externally-prescribed velocity.
const int nFixed = (int)obj->m_nFixed;
const int firstFixed = numParticles - nFixed;
for (int i = firstFixed; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
float v[8];
obj->m_xTilde_avx[i].store(v);
m_model->getPosition(particleIndex) = Vector3r((Real)v[0], (Real)v[1], (Real)v[2]);
}
// xk = xTilde (initial iterate)
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
xk[i] = obj->m_xTilde_avx[i];
const int maxIter = m_maxIter;
const int nFree = (int)dx.size();
obj->m_lbfgs_count = 0;
int totalCGIter = 0;
int iter = 0;
for (; iter < maxIter; iter++)
{
// Compute search direction (stored in dx_avx)
Real energy;
if (m_solverType == 0) // Newton
{
int cgIter;
energy = newtonSolve(obj, cgIter);
totalCGIter += cgIter;
}
else // L-BFGS
{
energy = lbfgsSolve(obj);
}
// Convergence check on ||dx||_inf (reduce from Scalarf8: max of lanes 0..2)
Real dxNorm = 0;
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
{
float v[8];
dx[i].store(v);
const Real localMax = std::max(std::max(std::abs((Real)v[0]), std::abs((Real)v[1])), std::abs((Real)v[2]));
#pragma omp critical
if (localMax > dxNorm) dxNorm = localMax;
}
if (dxNorm < m_maxError)
break;
// xk += dx (step size fixed at 1 — no line search in AVX path)
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
xk[i] += dx[i];
}
if (iter == maxIter)
LOG_WARN << "Elasticity_Kee2023: not converged after " << maxIter << " iterations";
updateVelocity(obj, fdt);
double elapsedMs = STOP_TIMING
LOG_INFO << "STATS frame obj=" << objIndex << " solverIter=" << iter + 1 << " lsIter=0 elapsed_ms=" << elapsedMs;
}
STOP_TIMING_AVG
}
#else
void Elasticity_Kee2023::stepElasticitySolver()
{
START_TIMING("Elasticity_Kee2023")
const unsigned int numActiveParticles = m_model->numActiveParticles();
if (numActiveParticles == 0)
return;
precomputeValues();
size_t numObjects = m_objects.size();
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
START_TIMING("objSolve")
ElasticObject* obj = m_objects[objIndex];
auto& xk = obj->m_xk;
auto& dx = obj->m_dx;
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
const Real fdt = obj->m_factorization->m_dt;
// Initialize: compute x_tilde = x + dt * v (uses externally-set velocity, e.g. from AnimationField)
computeXTilde(obj);
// Fixed particles: DFSPH integrator skips them, so write xTilde back to model position
// so they advance by the externally-prescribed velocity.
const int nFixed = (int)obj->m_nFixed;
const int firstFixed = numParticles - nFixed;
for (int i = firstFixed; i < numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
m_model->getPosition(particleIndex) = obj->m_xTilde[i];
}
// x0 = x_tilde
#pragma omp parallel for schedule(static)
for (int i = 0; i < numParticles; i++)
xk[i] = obj->m_xTilde[i];
// Iterative solve
const int maxIter = m_maxIter;
const int nFree = (int)dx.size();
// Renew L-BFGS buffers if window size changed at runtime
obj->m_lbfgs_count = 0;
if ((int)obj->m_lbfgs_s.size() != m_lbfgsWindowSize)
{
obj->m_lbfgs_s.resize(m_lbfgsWindowSize);
obj->m_lbfgs_y.resize(m_lbfgsWindowSize);
for (int w = 0; w < m_lbfgsWindowSize; w++)
{
obj->m_lbfgs_s[w].resize(nFree, Vector3r::Zero());
obj->m_lbfgs_y[w].resize(nFree, Vector3r::Zero());
}
obj->m_lbfgs_rho.resize(m_lbfgsWindowSize, 0);
obj->m_lbfgs_alpha.resize(m_lbfgsWindowSize, 0);
}
int totalLSIter = 0;
int totalCGIter = 0;
int iter = 0;
for (; iter < maxIter; iter++)
{
// Compute search direction (stored in dx) and gradient (stored in m_gradient)
Real energy;
if (m_solverType == 0) // Newton
{
int cgIter;
energy = newtonSolve(obj, cgIter);
totalCGIter += cgIter;
}
else // L-BFGS
{
energy = lbfgsSolve(obj);
}
// Convergence check on ||dx||_inf before line search
Real dxNorm = 0;
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
{
const Real localMax = dx[i].cwiseAbs().maxCoeff();
#pragma omp critical
if (localMax > dxNorm) dxNorm = localMax;
}
if (dxNorm < m_maxError)
break;
// Line search
Real alpha = static_cast<Real>(1.0);
if (m_useLineSearch)
{
int lsIter;
alpha = lineSearch(obj, energy, lsIter);
totalLSIter += lsIter;
if (alpha == static_cast<Real>(0))
{
LOG_INFO << "Elasticity_Kee2023: line search failed at iter " << iter;
break;
}
}
// xk += alpha * dx
#pragma omp parallel for schedule(static)
for (int i = 0; i < nFree; i++)
xk[i] += alpha * dx[i];
}
if (iter == maxIter)
LOG_WARN << "Elasticity_Kee2023: not converged after " << maxIter << " iterations";
updateVelocity(obj, fdt);
double elapsedMs = STOP_TIMING
if (m_solverType == 0) // Newton
LOG_INFO << "STATS frame obj=" << objIndex << " solverIter=" << iter + 1 << " cgIter=" << totalCGIter << " avgCG=" << (iter > 0 ? totalCGIter / (iter + 1) : 0) << " lsIter=" << totalLSIter << " elapsed_ms=" << elapsedMs;
else // L-BFGS
LOG_INFO << "STATS frame obj=" << objIndex << " solverIter=" << iter + 1 << " lsIter=" << totalLSIter << " elapsed_ms=" << elapsedMs;
}
STOP_TIMING_AVG
}
#endif
Real Elasticity_Kee2023::computePsi(const Matrix3r& F, const Matrix3r& R) const
{
const Real J = F.determinant();
if (m_materialType == ENUM_MATERIAL_STABLE_NEOHOOKEAN)
{
// Smith et al. 2018 Eq. 14
const Real I_C = (F.transpose() * F).trace();
const Real alpha = 1 + m_mu / m_lambda - m_mu / (4 * m_lambda);
const Real Jma = J - alpha;
return static_cast<Real>(0.5) * m_mu * (I_C - static_cast<Real>(3.0))
+ static_cast<Real>(0.5) * m_lambda * Jma * Jma
- static_cast<Real>(0.5) * m_mu * std::log(I_C + 1);
}
// Co-rotated: Psi = μ||F - R||² + (λ/2)(tr(R^T F) - 3)²
const Real trRtF = (R.transpose() * F).trace();
return m_mu * (F - R).squaredNorm() + static_cast<Real>(0.5) * m_lambda * (trRtF - static_cast<Real>(3.0)) * (trRtF - static_cast<Real>(3.0));
}
Matrix3r Elasticity_Kee2023::computeP(const Matrix3r& F, const Matrix3r& R) const
{
const Real J = F.determinant();
Matrix3r cofF;
cofF.col(0) = F.col(1).cross(F.col(2));
cofF.col(1) = F.col(2).cross(F.col(0));
cofF.col(2) = F.col(0).cross(F.col(1));
if (m_materialType == ENUM_MATERIAL_STABLE_NEOHOOKEAN)
{
// Smith et al. 2018 Eq. 18
const Real I_C = (F.transpose() * F).trace();
const Real alpha = 1 + m_mu / m_lambda - m_mu / (4 * m_lambda);
return m_mu * (1 - 1 / (I_C + 1)) * F + m_lambda * (J - alpha) * cofF;
}
// Co-rotated: P = 2μ(F - R) + λ(tr(R^T F) - 3) R
const Real trRtF = (R.transpose() * F).trace();
return static_cast<Real>(2.0) * m_mu * (F - R) + m_lambda * (trRtF - static_cast<Real>(3.0)) * R;
}