Program Listing for File Elasticity_Kugelstadt2021.cpp
↰ Return to documentation for file (SPlisHSPlasH/Elasticity/Elasticity_Kugelstadt2021.cpp)
#include "Elasticity_Kugelstadt2021.h"
#include "SPlisHSPlasH/Simulation.h"
#include "SPlisHSPlasH/Utilities/MathFunctions.h"
#include "SPlisHSPlasH/TimeManager.h"
#include "Utilities/Timing.h"
#include "Utilities/Counting.h"
#include <extern/md5/md5.h>
#include "Utilities/BinaryFileReaderWriter.h"
#include "Utilities/StringTools.h"
#include <Utilities/FileSystem.h>
#include <array>
using namespace SPH;
using namespace GenParam;
std::string Elasticity_Kugelstadt2021::METHOD_NAME = "Kugelstadt et al. 2021";
int Elasticity_Kugelstadt2021::YOUNGS_MODULUS = -1;
int Elasticity_Kugelstadt2021::POISSON_RATIO = -1;
int Elasticity_Kugelstadt2021::FIXED_BOX_MIN = -1;
int Elasticity_Kugelstadt2021::FIXED_BOX_MAX = -1;
int Elasticity_Kugelstadt2021::ITERATIONS_V = -1;
int Elasticity_Kugelstadt2021::MAX_ITERATIONS_V = -1;
int Elasticity_Kugelstadt2021::MAX_ERROR_V = -1;
int Elasticity_Kugelstadt2021::ALPHA = -1;
int Elasticity_Kugelstadt2021::MAX_NEIGHBORS = -1;
Elasticity_Kugelstadt2021::Elasticity_Kugelstadt2021(FluidModel *model) :
NonPressureForceBase(model)
{
const unsigned int numParticles = model->numActiveParticles();
m_restVolumes.resize(numParticles);
m_current_to_initial_index.resize(numParticles);
m_initial_to_current_index.resize(numParticles);
m_initialNeighbors.resize(numParticles);
m_rotations.resize(numParticles, Matrix3r::Identity());
m_stress.resize(numParticles);
m_L.resize(numParticles); // kernel gradient correction matrix L
m_F.resize(numParticles); // deformation gradient
m_RL.resize(numParticles); // stores the rotation matrix times the matrix L
m_vDiff.resize(numParticles, Vector3r::Zero()); // velocity difference used for the warm start of the volume cg solver
m_youngsModulus = static_cast<Real>(100000.0);
m_poissonRatio = static_cast<Real>(0.3);
m_iterationsV = 0;
m_maxIterV = 100;
m_maxErrorV = static_cast<Real>(1.0e-4);
m_alpha = 0.0;
m_youngsModulus = 5000000;
m_maxNeighbors = -1;
m_fixedBoxMin.setZero();
m_fixedBoxMax.setZero();
model->addField({ "rest volume", METHOD_NAME, FieldType::Scalar, [&](const unsigned int i) -> Real* { return &m_restVolumes[i]; }, true });
model->addField({ "rotation", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_rotations[i](0,0); } });
model->addField({ "stress", METHOD_NAME, FieldType::Scalar, [&](const unsigned int i) -> Real* { return &m_stress[i]; } });
model->addField({ "deformation gradient", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_F[i](0,0); } });
model->addField({ "correction matrix", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_L[i](0,0); } });
}
Elasticity_Kugelstadt2021::~Elasticity_Kugelstadt2021(void)
{
m_model->removeFieldByName("rest volume");
m_model->removeFieldByName("rotation");
m_model->removeFieldByName("stress");
m_model->removeFieldByName("deformation gradient");
m_model->removeFieldByName("correction matrix");
for (auto objIndex = 0; objIndex < m_objects.size(); objIndex++)
{
delete m_objects[objIndex];
}
m_objects.clear();
}
void Elasticity_Kugelstadt2021::deferredInit()
{
initValues();
}
void Elasticity_Kugelstadt2021::initParameters()
{
NonPressureForceBase::initParameters();
ParameterBase::GetFunc<Real> getFctYM = [&]()-> Real { return m_youngsModulus; };
ParameterBase::SetFunc<Real> setFctYM = [&](Real val)
{
m_youngsModulus = val;
m_mu = m_youngsModulus / (static_cast<Real>(2.0) * (static_cast<Real>(1.0) + m_poissonRatio));
m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast<Real>(1.0) + m_poissonRatio) * (static_cast<Real>(1.0) - static_cast<Real>(2.0) * m_poissonRatio));
if (Simulation::getCurrent()->isSimulationInitialized()) // if Young's modulus has changed, recompute the factorization
Simulation::getCurrent()->reset();
};
YOUNGS_MODULUS = createNumericParameter("youngsModulus", "Young`s modulus", getFctYM, setFctYM);
setGroup(YOUNGS_MODULUS, "Fluid Model|Elasticity");
setDescription(YOUNGS_MODULUS, "Stiffness of the elastic material");
RealParameter* rparam = static_cast<RealParameter*>(getParameter(YOUNGS_MODULUS));
rparam->setMinValue(0.0);
ParameterBase::GetFunc<Real> getFctPR = [&]()-> Real { return m_poissonRatio; };
ParameterBase::SetFunc<Real> setFctPR = [&](Real val)
{
m_poissonRatio = val;
m_mu = m_youngsModulus / (static_cast<Real>(2.0) * (static_cast<Real>(1.0) + m_poissonRatio));
m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast<Real>(1.0) + m_poissonRatio) * (static_cast<Real>(1.0) - static_cast<Real>(2.0) * m_poissonRatio));
if (Simulation::getCurrent()->isSimulationInitialized()) // if Poisson ration has changed, recompute the factorization
Simulation::getCurrent()->reset();
};
POISSON_RATIO = createNumericParameter("poissonsRatio", "Poisson`s ratio", getFctPR, setFctPR);
setGroup(POISSON_RATIO, "Fluid Model|Elasticity");
setDescription(POISSON_RATIO, "Ratio of transversal expansion and axial compression");
rparam = static_cast<RealParameter*>(getParameter(POISSON_RATIO));
rparam->setMinValue(static_cast<Real>(-1.0 + 1e-4));
rparam->setMaxValue(static_cast<Real>(0.5 - 1e-4));
ITERATIONS_V = createNumericParameter("volumeIterations", "Iterations", &m_iterationsV);
setGroup(ITERATIONS_V, "Fluid Model|Elasticity");
setDescription(ITERATIONS_V, "Iterations required by the volume solver.");
getParameter(ITERATIONS_V)->setReadOnly(true);
MAX_ITERATIONS_V = createNumericParameter("volumeMaxIter", "Max. iterations (volume solver)", &m_maxIterV);
setGroup(MAX_ITERATIONS_V, "Fluid Model|Elasticity");
setDescription(MAX_ITERATIONS_V, "Max. iterations of the volume solver");
static_cast<NumericParameter<unsigned int>*>(getParameter(MAX_ITERATIONS_V))->setMinValue(0);
MAX_ERROR_V = createNumericParameter("volumeMaxError", "Max. volume error", &m_maxErrorV);
setGroup(MAX_ERROR_V, "Fluid Model|Elasticity");
setDescription(MAX_ERROR_V, "Max. error of the volume solver");
rparam = static_cast<RealParameter*>(getParameter(MAX_ERROR_V));
rparam->setMinValue(1e-9);
ParameterBase::GetFunc<Real> getFctAlpha = [&]()-> Real { return m_alpha; };
ParameterBase::SetFunc<Real> setFctAlpha = [&](Real val)
{
m_alpha = val;
if (Simulation::getCurrent()->isSimulationInitialized()) // if value has changed, recompute the factorization
Simulation::getCurrent()->reset();
};
ALPHA = createNumericParameter("alpha", "Zero-energy modes suppression", getFctAlpha, setFctAlpha);
setGroup(ALPHA, "Fluid Model|Elasticity");
setDescription(ALPHA, "Coefficent for zero-energy modes suppression method");
rparam = static_cast<RealParameter*>(getParameter(ALPHA));
rparam->setMinValue(0.0);
ParameterBase::GetFunc<int> getFct5 = [&]()-> int { return m_maxNeighbors; };
ParameterBase::SetFunc<int> setFct5 = [&](int val)
{
m_maxNeighbors = val;
if (Simulation::getCurrent()->isSimulationInitialized()) // if value has changed, recompute the factorization
Simulation::getCurrent()->reset();
};
MAX_NEIGHBORS = createNumericParameter("maxNeighbors", "Max. neighbors", getFct5, setFct5);
setGroup(MAX_NEIGHBORS, "Fluid Model|Elasticity");
setDescription(MAX_NEIGHBORS, "Maximum number of neighbors that are considered.");
ParameterBase::GetVecFunc<Real> getFctFMin = [&]()-> Real* { return m_fixedBoxMin.data(); };
ParameterBase::SetVecFunc<Real> setFctFMin = [&](Real* val)
{
m_fixedBoxMin = Vector3r(val[0], val[1], val[2]);
determineFixedParticles();
};
FIXED_BOX_MIN = createVectorParameter("fixedBoxMin", "Fixed box min", 3u, getFctFMin, setFctFMin);
setGroup(FIXED_BOX_MIN, "Fluid Model|Elasticity");
setDescription(FIXED_BOX_MIN, "Minimum point of box of which contains the fixed particles.");
getParameter(FIXED_BOX_MIN)->setReadOnly(true);
ParameterBase::GetVecFunc<Real> getFctFMax = [&]()-> Real* { return m_fixedBoxMax.data(); };
ParameterBase::SetVecFunc<Real> setFctFMax = [&](Real* val)
{
m_fixedBoxMax = Vector3r(val[0], val[1], val[2]);
determineFixedParticles();
};
FIXED_BOX_MAX = createVectorParameter("fixedBoxMax", "Fixed box max", 3u, getFctFMax, setFctFMax);
setGroup(FIXED_BOX_MAX, "Fluid Model|Elasticity");
setDescription(FIXED_BOX_MAX, "Maximum point of box of which contains the fixed particles.");
getParameter(FIXED_BOX_MAX)->setReadOnly(true);
}
void Elasticity_Kugelstadt2021::determineFixedParticles()
{
const unsigned int numParticles = m_model->numActiveParticles();
if (!m_fixedBoxMin.isZero() || !m_fixedBoxMax.isZero())
{
for (int i = 0; i < (int)numParticles; i++)
{
const Vector3r& x = m_model->getPosition0(i);
if ((x[0] > m_fixedBoxMin[0]) && (x[1] > m_fixedBoxMin[1]) && (x[2] > m_fixedBoxMin[2]) &&
(x[0] < m_fixedBoxMax[0]) && (x[1] < m_fixedBoxMax[1]) && (x[2] < m_fixedBoxMax[2]))
{
m_model->setParticleState(i, ParticleState::Fixed);
}
}
}
}
std::string Elasticity_Kugelstadt2021::computeMD5(const unsigned int objIndex)
{
ElasticObject* obj = m_objects[objIndex];
auto& group = obj->m_particleIndices;
auto numParticles = group.size();
std::vector<unsigned int> tempN;
tempN.resize(obj->m_particleIndices.size() * 2);
for (size_t i = 0; i < numParticles; i++)
{
const unsigned int particleIndex = group[i];
const size_t numNeighbors = m_initialNeighbors[particleIndex].size();
tempN[2 * i] = static_cast<unsigned int>(numNeighbors);
tempN[2 * i + 1] = 0;
for (auto j = 0; j < numNeighbors; j++)
tempN[2 * i + 1] += m_initialNeighbors[particleIndex][j] - group[0];
}
// compute MD5 checksum for all particle positions, this is used for the cache file
MD5 context((unsigned char*)&tempN[0], static_cast<unsigned int>(2 * numParticles * sizeof(unsigned int)));
char* md5hex = context.hex_digest();
tempN.clear();
return std::string(md5hex);
}
void Elasticity_Kugelstadt2021::initValues()
{
Simulation *sim = Simulation::getCurrent();
sim->getNeighborhoodSearch()->find_neighbors();
FluidModel *model = m_model;
const unsigned int numParticles = model->numActiveParticles();
const unsigned int fluidModelIndex = model->getPointSetIndex();
m_totalNeighbors = 0u;
// Store the neighbors in the reference configurations and
// compute the volume of each particle in rest state
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
m_current_to_initial_index[i] = i;
m_initial_to_current_index[i] = i;
// reset particle state
if (m_model->getParticleState(i) == ParticleState::Fixed)
m_model->setParticleState(i, ParticleState::Active);
// only neighbors in same phase will influence elasticity
unsigned int numNeighbors = sim->numberOfNeighbors(fluidModelIndex, fluidModelIndex, i);
m_initialNeighbors[i].resize(numNeighbors);
for (unsigned int j = 0; j < numNeighbors; j++)
m_initialNeighbors[i][j] = sim->getNeighbor(fluidModelIndex, fluidModelIndex, i, j);
// if maxNeighbors is set, then sort all neighbors wrt. to their distance to xi
// and only take the maxNeighbors next ones.
if (m_maxNeighbors > 0)
{
struct Comparator {
Comparator(const Vector3r& xi, Vector3r* x) : m_xi(xi), m_x(x) {};
bool operator()(unsigned int a, unsigned int b)
{
return (m_x[a] - m_xi).squaredNorm() < (m_x[b] - m_xi).squaredNorm();
}
Vector3r m_xi;
Vector3r* m_x;
};
// sort the neighbors according to their distance
std::sort(m_initialNeighbors[i].begin(), m_initialNeighbors[i].end(), Comparator(model->getPosition0(i), &model->getPosition0(0)));
// take only the next maxNeighbors
if (m_initialNeighbors[i].size() > m_maxNeighbors)
{
numNeighbors = m_maxNeighbors;
m_initialNeighbors[i].resize(m_maxNeighbors);
}
}
m_totalNeighbors += numNeighbors;
// compute rest volume
Real density = model->getMass(i) * sim->W_zero();
const Vector3r &xi0 = model->getPosition0(i);
for (size_t j = 0; j < m_initialNeighbors[i].size(); j++)
{
const unsigned int neighborIndex0 = m_initialNeighbors[i][j];
const Vector3r& xj0 = model->getPosition0(neighborIndex0);
density += model->getMass(neighborIndex0) * sim->W(xi0 - xj0);
}
m_restVolumes[i] = model->getMass(i) / density;
m_rotations[i].setIdentity();
m_stress[i] = 0.0;
m_F[i].setIdentity();
m_vDiff[i].setZero();
m_RL[i].setIdentity();
}
}
// mark all particles in the bounding box as fixed
determineFixedParticles();
// find separate objects
START_TIMING("findObjects")
findObjects();
STOP_TIMING_AVG;
// if we find the same object, copy the neighborhood info
size_t numObjects = m_objects.size();
auto& fluidInfos = sim->getFluidInfos();
for (auto objIndex = 1; objIndex < numObjects; objIndex++)
{
bool foundSameObj = false;
int objIndex2;
for (objIndex2 = 0; objIndex2 < objIndex; objIndex2++)
{
if (fluidInfos[objIndex].hasSameParticleSampling(fluidInfos[objIndex2]))
{
foundSameObj = true;
break;
}
}
if (foundSameObj)
{
ElasticObject* obj = m_objects[objIndex];
ElasticObject* obj0 = m_objects[objIndex2];
const std::vector<unsigned int>& group = obj->m_particleIndices;
const std::vector<unsigned int>& group0 = obj0->m_particleIndices;
int numParticles = (int)group.size();
int offset = group[0];
int offset0 = group0[0];
for (int i = 0; i < (int)numParticles; i++)
{
int particleIndex = group[i];
int particleIndex0 = group0[i];
const unsigned int i0 = m_current_to_initial_index[particleIndex];
const unsigned int i00 = m_current_to_initial_index[particleIndex0];
const size_t numNeighbors = m_initialNeighbors[i00].size();
m_initialNeighbors[i0].resize(numNeighbors);
for (int j = 0; j < numNeighbors; j++)
{
m_initialNeighbors[i0][j] = m_initialNeighbors[i00][j] - offset0 + offset;
}
}
}
}
// compute kernel gradient correction matrix
START_TIMING("computeMatrixL")
computeMatrixL();
STOP_TIMING_AVG;
// init factorization
START_TIMING("initSystem")
initSystem();
STOP_TIMING_AVG;
}
void Elasticity_Kugelstadt2021::findObjects()
{
Simulation *sim = Simulation::getCurrent();
const unsigned int numParticles = m_model->numActiveParticles();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
FluidModel *model = m_model;
m_objects.clear();
std::map<unsigned int, unsigned int> obj2group;
for (unsigned int i = 0; i < numParticles; i++)
{
const unsigned int objId = m_model->getObjectId(i);
// search for object id in map
if (obj2group.find(objId) == obj2group.end())
{
// if id was not found, then we have a new object
m_objects.push_back(new ElasticObject());
const unsigned int groupIndex = (unsigned int)(m_objects.size()-1);
obj2group[objId] = groupIndex;
const unsigned int i0 = m_current_to_initial_index[i];
m_objects[groupIndex]->m_particleIndices.push_back(i0);
}
else
{
// object already exists
const unsigned int groupIndex = obj2group[objId];
const unsigned int i0 = m_current_to_initial_index[i];
m_objects[groupIndex]->m_particleIndices.push_back(i0);
}
}
// For each object sort the particles so that all fixed particles are at the end of the list.
// This is needed for the factorization to exclude fixed particles.
for (size_t groupIndex = 0; groupIndex < m_objects.size(); groupIndex++)
{
struct Comparator {
Comparator(Elasticity_Kugelstadt2021* _this) : m_this(_this) {};
bool operator()(unsigned int a, unsigned int b)
{
if ((m_this->m_model->getParticleState(a) != ParticleState::Active) && (m_this->m_model->getParticleState(b) == ParticleState::Active))
return false;
else if ((m_this->m_model->getParticleState(a) == ParticleState::Active) && (m_this->m_model->getParticleState(b) != ParticleState::Active))
return true;
else if ((m_this->m_model->getParticleState(a) != ParticleState::Active) && (m_this->m_model->getParticleState(b) != ParticleState::Active))
return a < b;
else
return a < b;
}
Elasticity_Kugelstadt2021* m_this;
};
std::sort(m_objects[groupIndex]->m_particleIndices.begin(), m_objects[groupIndex]->m_particleIndices.end(), Comparator(this));
m_objects[groupIndex]->m_nFixed = 0;
for (size_t i = 0; i < m_objects[groupIndex]->m_particleIndices.size(); i++)
{
if (m_model->getParticleState(m_objects[groupIndex]->m_particleIndices[i]) != ParticleState::Active)
m_objects[groupIndex]->m_nFixed++;
}
LOG_INFO << "Object " << groupIndex << " - fixed particles: " << m_objects[groupIndex]->m_nFixed;
}
}
void Elasticity_Kugelstadt2021::initSystem()
{
Simulation *sim = Simulation::getCurrent();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
// Compute Lamé parameters
m_mu = m_youngsModulus / (static_cast<Real>(2.0) * (static_cast<Real>(1.0) + m_poissonRatio));
m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast<Real>(1.0) + m_poissonRatio) * (static_cast<Real>(1.0) - static_cast<Real>(2.0) * m_poissonRatio));
FluidModel *model = m_model;
size_t numObjects = m_objects.size();
auto& fluidInfos = sim->getFluidInfos();
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
ElasticObject* obj = m_objects[objIndex];
// compute MD5 check sum
std::string md5 = computeMD5(objIndex);
// check if object with same md5 already exists
bool foundFactorization = false;
for (size_t i = 0; i < objIndex; i++)
{
// reuse the factorization for all objects with the same particle sampling to reduce the memory consumption
if (fluidInfos[objIndex].hasSameParticleSampling(fluidInfos[i]))
{
m_objects[objIndex]->m_factorization = m_objects[i]->m_factorization;
foundFactorization = true;
LOG_INFO << "Object " << objIndex << " is using the factorization of object " << i;
break;
}
}
// if no factorization was found, create a new one
if (obj->m_factorization == nullptr)
obj->m_factorization = std::make_shared<Factorization>();
// generate file name for cache file
std::string baseName = Utilities::FileSystem::getFileName(fluidInfos[objIndex].samplesFile);
std::string ext = Utilities::FileSystem::getFileExt(fluidInfos[objIndex].samplesFile);
std::string cacheFileName = sim->getCachePath() + "/" + baseName + "_" + ext +
"_" + std::to_string(fluidInfos[objIndex].mode) +
"_" + md5 + "_" + Utilities::StringTools::real2String(dt) +
"_" + Utilities::StringTools::real2String(m_mu) +
"_" + Utilities::StringTools::real2String(m_alpha) +
".bin";
// Fluid block
if (fluidInfos[objIndex].type == 0)
{
Vector3r diag = (fluidInfos[objIndex].box.max() - fluidInfos[objIndex].box.min());
cacheFileName = sim->getCachePath() + "/Block_" +
Utilities::StringTools::real2String(diag.x()) + "_" + Utilities::StringTools::real2String(diag.y()) + "_" + Utilities::StringTools::real2String(diag.z()) +
"_" + std::to_string(fluidInfos[objIndex].mode) +
"_" + md5 + "_" + Utilities::StringTools::real2String(dt) +
"_" + Utilities::StringTools::real2String(m_mu) +
"_" + Utilities::StringTools::real2String(m_alpha) +
".bin";
}
// check if cache file exists
const bool foundCacheFile = Utilities::FileSystem::fileExists(cacheFileName);
if (sim->getUseCache() && foundCacheFile)
{
// if factorization cannot be reused from another object and a cache file was found, load the cache file
if (!foundFactorization)
{
LOG_INFO << "Read cached factorization: " << cacheFileName;
BinaryFileReader binReader;
binReader.openFile(cacheFileName);
binReader.readSparseMatrix(obj->m_factorization->m_D);
binReader.readSparseMatrix(obj->m_factorization->m_DT_K);
binReader.read(obj->m_factorization->m_dt);
binReader.read(obj->m_factorization->m_mu);
binReader.readSparseMatrix(obj->m_factorization->m_matHTH);
#ifdef USE_AVX
delete obj->m_factorization->m_cholesky;
obj->m_factorization->m_cholesky = new CholeskyAVXSolver();
obj->m_factorization->m_cholesky->load(binReader);
#else
binReader.readSparseMatrix(obj->m_factorization->m_matL);
binReader.readSparseMatrix(obj->m_factorization->m_matLT);
binReader.readMatrixX(obj->m_factorization->m_permInd);
binReader.readMatrixX(obj->m_factorization->m_permInvInd);
#endif
binReader.closeFile();
}
// init vectors
int numParticles = (int)obj->m_particleIndices.size();
obj->m_RHS.resize(numParticles - obj->m_nFixed);
#ifdef USE_AVX
obj->m_rhs.resize(3 * numParticles - obj->m_nFixed);
obj->m_sol.resize(3 * numParticles - obj->m_nFixed);
obj->m_f_avx.resize(3 * numParticles);
obj->m_sol_avx.resize(numParticles);
int vecSize;
if (numParticles % 8 == 0) vecSize = numParticles / 8;
else vecSize = numParticles / 8 + 1;
obj->m_quats_avx.resize(vecSize);
for (int i = 0; i < vecSize; i++)
obj->m_quats_avx[i] = Quaternion8f();
#else
obj->m_f.resize(3 * numParticles);
obj->m_sol.resize(numParticles);
obj->m_quats.resize(numParticles, Quaternionr::Identity());
obj->m_RHS_perm.resize(numParticles - obj->m_nFixed);
#endif
}
else // no cache found
{
ElasticObject* obj = m_objects[objIndex];
// compute new factorization if no factorization can be reused
if (!foundFactorization)
initFactorization(obj->m_factorization, obj->m_particleIndices, obj->m_nFixed, dt, m_mu);
// init vectors
int numParticles = (int)obj->m_particleIndices.size();
obj->m_RHS.resize(numParticles - obj->m_nFixed);
#ifdef USE_AVX
obj->m_rhs.resize(3 * numParticles - obj->m_nFixed);
obj->m_sol.resize(3 * numParticles - obj->m_nFixed);
obj->m_f_avx.resize(3 * numParticles);
obj->m_sol_avx.resize(numParticles);
int vecSize;
if (numParticles % 8 == 0) vecSize = numParticles / 8;
else vecSize = numParticles / 8 + 1;
obj->m_quats_avx.resize(vecSize);
for (int i = 0; i < vecSize; i++)
obj->m_quats_avx[i] = Quaternion8f();
#else
obj->m_f.resize(3 * numParticles);
obj->m_sol.resize(numParticles);
obj->m_quats.resize(numParticles, Quaternionr::Identity());
obj->m_RHS_perm.resize(numParticles - obj->m_nFixed);
#endif
// write cache file
if (sim->getUseCache() && (Utilities::FileSystem::makeDir(sim->getCachePath()) == 0))
{
BinaryFileWriter binWriter;
binWriter.openFile(cacheFileName);
binWriter.writeSparseMatrix(obj->m_factorization->m_D);
binWriter.writeSparseMatrix(obj->m_factorization->m_DT_K);
binWriter.write(obj->m_factorization->m_dt);
binWriter.write(obj->m_factorization->m_mu);
binWriter.writeSparseMatrix(obj->m_factorization->m_matHTH);
#ifdef USE_AVX
obj->m_factorization->m_cholesky->save(binWriter);
#else
binWriter.writeSparseMatrix(obj->m_factorization->m_matL);
binWriter.writeSparseMatrix(obj->m_factorization->m_matLT);
binWriter.writeMatrixX(obj->m_factorization->m_permInd);
binWriter.writeMatrixX(obj->m_factorization->m_permInvInd);
#endif
binWriter.closeFile();
}
}
obj->m_md5 = md5;
}
}
void Elasticity_Kugelstadt2021::initFactorization(std::shared_ptr<Factorization> factorization, std::vector<unsigned int>& particleIndices, const unsigned int nFixed, const Real dt, const Real mu)
{
Simulation* sim = Simulation::getCurrent();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
factorization->m_dt = dt;
factorization->m_mu = mu;
// init mapping to find the particle indices in the current particle group (object)
std::vector<unsigned int> groupInv;
groupInv.resize(m_model->numActiveParticles());
int numParticles = (int)particleIndices.size();
std::vector<unsigned int>& group = particleIndices;
for (int i = 0; i < numParticles; i++)
groupInv[group[i]] = i;
// determine total number of neighbors
int totalNeighbors = 0;
for (int i = 0; i < (int)numParticles; i++)
{
int particleIndex = group[i];
const int numNeighbors = (int) m_initialNeighbors[i].size();
totalNeighbors += numNeighbors;
}
// init triplets for matrices D, K, M
std::vector<Eigen::Triplet<double>> triplets_D;
std::vector<Eigen::Triplet<double>> triplets_K;
std::vector<Eigen::Triplet<double>> triplets_M;
triplets_D.reserve(2 * 3 * totalNeighbors);
triplets_K.reserve(3 * numParticles);
triplets_M.reserve(numParticles);
std::vector<Eigen::Triplet<double>> triplets_H;
triplets_H.reserve(totalNeighbors + numParticles);
std::vector<Eigen::Triplet<double>> triplets_K2;
triplets_K2.reserve(totalNeighbors);
const double dtd = dt;
const double mud = mu;
const double alphad = m_alpha;
// init matrices D and K
unsigned int row_index = 0;
for (int i = 0; i < (int)numParticles; i++)
{
int particleIndex0 = group[i];
const unsigned int i0 = particleIndex0;
unsigned int particleIndex = m_initial_to_current_index[i0];
const double restVolumes_id = m_restVolumes[i];
// init matrix K according to Eq. 13 in the paper
// Set triplets for the matrix K. Directly multiply the factor 2*dt*dt into K
const double Kreal = 2.0 * dtd * dtd * mud * restVolumes_id;
// init mass matrix
// all particles have the same mass => M = mass*I
triplets_M.push_back(Eigen::Triplet<double>(i, i, m_model->getMass(particleIndex)));
for (int j = 0; j < 3; j++)
triplets_K.push_back(Eigen::Triplet<double>(3 * i + j, 3 * i + j, Kreal));
const Eigen::Vector3d xi0d = m_model->getPosition0(i0).cast<double>();
const size_t numNeighbors = m_initialNeighbors[i0].size();
for (unsigned int j = 0; j < numNeighbors; j++)
{
const unsigned int neighborIndex0 = m_initialNeighbors[i0][j];
const unsigned int neighborIndex = m_initial_to_current_index[neighborIndex0];
const Eigen::Vector3d xj0d = m_model->getPosition0(neighborIndex0).cast<double>();
const Eigen::Vector3d correctedKernelGradient = m_L[particleIndex].cast<double>() * sim->gradW((xi0d - xj0d).cast<Real>()).cast<double>();
const double restVolumes_jd = m_restVolumes[neighborIndex];
const Eigen::Vector3d y = restVolumes_jd * correctedKernelGradient;
// init matrix D according to Eq. 3 and 4 in the supplemental document of the paper
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 0, i, -y[0]));
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 1, i, -y[1]));
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 2, i, -y[2]));
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 0, groupInv[neighborIndex0], y[0]));
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 1, groupInv[neighborIndex0], y[1]));
triplets_D.push_back(Eigen::Triplet<double>(3 * i + 2, groupInv[neighborIndex0], y[2]));
double sum = 0.0;
const Eigen::Vector3d xi_xj_0 = xi0d - xj0d;
const double xixj0_l2 = xi_xj_0.squaredNorm();
const double beta = 1.0;
// init matrix \tilde K * dt^2 according to Eq. 2 in the paper
const double Kreal2 = alphad * dtd * dtd * mud * static_cast<double>(m_restVolumes[i]) * static_cast<double>(m_restVolumes[neighborIndex]) * (sim->W(xi_xj_0.cast<Real>()) / xixj0_l2);
triplets_K2.push_back(Eigen::Triplet<double>(row_index, row_index, Kreal2));
// init matrix H according to Eq. 5, 6 and 7 in the supplemental document of the paper
for (unsigned int k = 0; k < numNeighbors; k++)
{
const unsigned int kIndex0 = m_initialNeighbors[i0][k];
const unsigned int kIndex = m_initial_to_current_index[kIndex0];
const Eigen::Vector3d xk0d = m_model->getPosition0(kIndex0).cast<double>();
const Eigen::Vector3d correctedKernelGradientd = m_L[particleIndex].cast<double>() * sim->gradW((xi0d - xk0d).cast<Real>()).cast<double>();
const double restVolumes_kd = m_restVolumes[kIndex];
const double H3j = beta * restVolumes_kd * correctedKernelGradientd.dot(xi0d - xj0d);
triplets_H.push_back(Eigen::Triplet<double>(row_index, groupInv[kIndex0], H3j));
sum -= H3j;
if (k == j)
{
triplets_H.push_back(Eigen::Triplet<double>(row_index, groupInv[kIndex0], beta));
}
}
triplets_H.push_back(Eigen::Triplet<double>(row_index, i, sum - beta));
row_index++;
}
}
Eigen::SparseMatrix<double> D(3 * numParticles, numParticles);
factorization->m_D.resize(3 * numParticles, numParticles);
D.setFromTriplets(triplets_D.begin(), triplets_D.end());
factorization->m_D = D.cast<Real>();
//set matrices
Eigen::SparseMatrix<double> K(3 * numParticles, 3 * numParticles); // actually 2 * dt* dt * K
K.setFromTriplets(triplets_K.begin(), triplets_K.end());
Eigen::SparseMatrix<double> M(numParticles, numParticles);
M.setFromTriplets(triplets_M.begin(), triplets_M.end());
Eigen::SparseMatrix<double> DT_K(numParticles, 3 * numParticles);
factorization->m_DT_K.resize(numParticles, 3 * numParticles);
DT_K = D.transpose() * K;
factorization->m_DT_K = DT_K.cast<Real>();
Eigen::SparseMatrix<double> K2(totalNeighbors, totalNeighbors);
K2.setFromTriplets(triplets_K2.begin(), triplets_K2.end());
Eigen::SparseMatrix<double> H(totalNeighbors, numParticles);
H.setFromTriplets(triplets_H.begin(), triplets_H.end());
Eigen::SparseMatrix<double> HTH(numParticles, numParticles);
factorization->m_matHTH.resize(numParticles, numParticles);
HTH = H.transpose() * K2 * H;
factorization->m_matHTH = HTH.cast<Real>();
LOG_INFO << "Non zero elements (H^T * K * H): " << factorization->m_matHTH.nonZeros();
Eigen::SparseMatrix<double> DT_K_D = DT_K * D;
Eigen::SparseMatrix<double> M_plus_DT_K_D;
// init linear system matrix according to Eq. 29 in the paper
if (m_alpha != 0.0)
M_plus_DT_K_D = (M + DT_K_D + HTH).block(0, 0, numParticles - nFixed, numParticles - nFixed);
else // no zero energy mode control
M_plus_DT_K_D = (M + DT_K_D).block(0, 0, numParticles - nFixed, numParticles - nFixed);
M_plus_DT_K_D.makeCompressed();
#ifdef USE_AVX
// compute factorization of the matrix
delete factorization->m_cholesky;
factorization->m_cholesky = new CholeskyAVXSolver(M_plus_DT_K_D);
#else
SolverLLT* solverLLT = new SolverLLT();
solverLLT->compute(M_plus_DT_K_D);
if (solverLLT->info() != Eigen::Success)
{
LOG_ERR << "Cholesky decomposition failed.";
LOG_ERR << solverLLT->info();
return;
}
factorization->m_permInd = solverLLT->permutationP().indices();
factorization->m_permInvInd = solverLLT->permutationPinv().indices();
factorization->m_matL = Eigen::SparseMatrix<Real, Eigen::ColMajor>(solverLLT->matrixL().cast<Real>());
factorization->m_matLT = Eigen::SparseMatrix<Real, Eigen::ColMajor>(solverLLT->matrixU().cast<Real>());
delete solverLLT;
#endif
LOG_INFO << "Non zero elements (A): " << M_plus_DT_K_D.nonZeros();
}
void Elasticity_Kugelstadt2021::step()
{
// apply accelerations
const unsigned int numParticles = m_model->numActiveParticles();
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (m_model->getParticleState(i) == ParticleState::Active)
{
Vector3r& vel = m_model->getVelocity(i);
vel += dt * m_model->getAcceleration(i);
m_model->getAcceleration(i).setZero();
}
}
}
START_TIMING("Elasticity")
stepElasticitySolver(); // elasticity solver using the factorization
stepVolumeSolver(); // volume solver using a cg method
STOP_TIMING_AVG
}
void Elasticity_Kugelstadt2021::reset()
{
initValues();
}
void Elasticity_Kugelstadt2021::performNeighborhoodSearchSort()
{
const unsigned int numPart = m_model->numActiveParticles();
if (numPart == 0)
return;
Simulation *sim = Simulation::getCurrent();
auto const& d = sim->getNeighborhoodSearch()->point_set(m_model->getPointSetIndex());
d.sort_field(&m_rotations[0]);
d.sort_field(&m_current_to_initial_index[0]);
d.sort_field(&m_L[0]);
d.sort_field(&m_restVolumes[0]);
d.sort_field(&m_vDiff[0]);
for (unsigned int i = 0; i < numPart; i++)
m_initial_to_current_index[m_current_to_initial_index[i]] = i;
#ifdef USE_AVX
// update quaterions which are needed for warmstart
rotationMatricesToAVXQuaternions();
#endif
}
void Elasticity_Kugelstadt2021::computeMatrixL()
{
Simulation *sim = Simulation::getCurrent();
const unsigned int numParticles = m_model->numActiveParticles();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = m_current_to_initial_index[i];
const Vector3r &xi0 = m_model->getPosition0(i0);
Matrix3r L;
L.setZero();
const size_t numNeighbors = m_initialNeighbors[i0].size();
// Fluid
for (unsigned int j = 0; j < numNeighbors; j++)
{
// get initial neighbor index considering the current particle order
const unsigned int neighborIndex0 = m_initialNeighbors[i0][j];
const unsigned int neighborIndex = m_initial_to_current_index[neighborIndex0];
const Vector3r &xj0 = m_model->getPosition0(neighborIndex0);
const Vector3r xj_xi_0 = xj0 - xi0;
const Vector3r gradW = sim->gradW(xj_xi_0);
// minus because gradW(xij0) == -gradW(xji0)
L -= m_restVolumes[neighborIndex] * gradW * xj_xi_0.transpose();
}
// add 1 to z-component. otherwise we get a singular matrix in 2D
if (sim->is2DSimulation())
L(2, 2) = 1.0;
bool invertible = false;
L.computeInverseWithCheck(m_L[i], invertible, static_cast<Real>(1e-9));
if (!invertible)
{
LOG_INFO << "Matrix not invertible.";
MathFunctions::pseudoInverse(L, m_L[i]);
//m_L[i] = Matrix3r::Identity();
}
}
}
}
void Elasticity_Kugelstadt2021::saveState(BinaryFileWriter &binWriter)
{
binWriter.writeBuffer((char*)m_current_to_initial_index.data(), m_current_to_initial_index.size() * sizeof(unsigned int));
binWriter.writeBuffer((char*)m_initial_to_current_index.data(), m_initial_to_current_index.size() * sizeof(unsigned int));
binWriter.writeBuffer((char*)m_L.data(), m_L.size() * sizeof(Matrix3r));
binWriter.writeBuffer((char*)m_rotations.data(), m_rotations.size() * sizeof(Matrix3r));
binWriter.writeBuffer((char*)m_vDiff.data(), m_vDiff.size() * sizeof(Vector3r));
}
void Elasticity_Kugelstadt2021::loadState(BinaryFileReader &binReader)
{
binReader.readBuffer((char*)m_current_to_initial_index.data(), m_current_to_initial_index.size() * sizeof(unsigned int));
binReader.readBuffer((char*)m_initial_to_current_index.data(), m_initial_to_current_index.size() * sizeof(unsigned int));
binReader.readBuffer((char*)m_L.data(), m_L.size() * sizeof(Matrix3r));
binReader.readBuffer((char*)m_rotations.data(), m_rotations.size() * sizeof(Matrix3r));
binReader.readBuffer((char*)m_vDiff.data(), m_vDiff.size() * sizeof(Vector3r));
#ifdef USE_AVX
// update quaternions which are needed for warmstart
rotationMatricesToAVXQuaternions();
#endif
}
void Elasticity_Kugelstadt2021::stepVolumeSolver()
{
const unsigned int numParticles = m_model->numActiveParticles();
if (numParticles == 0)
return;
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
// extract current rotations
START_TIMING("computeRotations");
computeRotations();
STOP_TIMING_AVG;
// precompute some values to improve performance
START_TIMING("precomputeValues");
precomputeValues();
STOP_TIMING_AVG;
START_TIMING("Volume solver")
// Init linear system solver
MatrixReplacement A(3 * m_model->numActiveParticles(), matrixVecProd, (void*)this);
m_solver.setTolerance(m_maxErrorV);
m_solver.setMaxIterations(m_maxIterV);
m_solver.compute(A);
VectorXr b(3 * numParticles);
VectorXr x(3 * numParticles);
VectorXr g(3 * numParticles);
// Compute right hand side of the system in Eq. 30
computeRHS(b);
// warmstart
#pragma omp parallel for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (m_model->getParticleState(i) == ParticleState::Active)
g.segment<3>(3 * i) = m_model->getVelocity(i) + m_vDiff[i];
else
{
b.segment<3>(3 * i) = m_model->getVelocity(i);
g.segment<3>(3 * i) = m_model->getVelocity(i);
}
}
// Solve linear system
START_TIMING("Volume - CG solve");
x = m_solver.solveWithGuess(b, g);
m_iterationsV = (int)m_solver.iterations();
STOP_TIMING_AVG;
INCREASE_COUNTER("Elasticity - CG iterations", static_cast<Real>(m_iterationsV));
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (m_model->getParticleState(i) == ParticleState::Active)
{
Vector3r& vi = m_model->getVelocity(i);
const Vector3r new_vi = x.segment<3>(3 * i);
m_vDiff[i] = new_vi - vi;
vi = new_vi;
}
}
}
STOP_TIMING_AVG;
}
#ifdef USE_AVX
void Elasticity_Kugelstadt2021::rotationMatricesToAVXQuaternions()
{
size_t numObjects = m_objects.size();
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
ElasticObject* obj = m_objects[objIndex];
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
int vecSize;
if (numParticles % 8 == 0)
vecSize = numParticles / 8;
else
vecSize = numParticles / 8 + 1;
auto& quats = obj->m_quats_avx;
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < vecSize; i++)
{
const int count = std::min(numParticles - i * 8, 8);
// store the rotation matrices of 8 particles in avx vectors
int idx[8];
for (int j = 0; j < count; j++)
idx[j] = m_initial_to_current_index[group[8 * i + j]];
for (int j = count; j < 8; j++)
idx[j] = 0;
Quaternionr q[8];
for (auto j = 0; j < count; j++)
q[j] = Quaternionr(m_rotations[idx[j]]).normalized();
for (auto j = count; j < 8; j++)
q[j] = Quaternionr(1,0,0,0);
Quaternion8f& q_avx = quats[i];
q_avx.set(q);
}
}
}
}
void Elasticity_Kugelstadt2021::computeRotations()
{
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
size_t numObjects = m_objects.size();
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
ElasticObject* obj = m_objects[objIndex];
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
int vecSize;
if (numParticles % 8 == 0)
vecSize = numParticles / 8;
else
vecSize = numParticles / 8 + 1;
auto& D = obj->m_factorization->m_D;
auto& f_avx = obj->m_f_avx;
auto& sol_avx = obj->m_sol_avx;
auto& quats = obj->m_quats_avx;
// advect particles
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Vector3r& xi0 = m_model->getPosition0(i0);
const size_t numNeighbors = m_initialNeighbors[i0].size();
Vector3r xNew = m_model->getPosition(particleIndex);
xNew += dt * m_model->getVelocity(particleIndex);
// copy the 3 coordinates to sol_avx
sol_avx[i] = Scalarf8(xNew[0], xNew[1], xNew[2], 0, 0, 0, 0, 0);
f_avx[3 * i].setZero();
f_avx[3 * i + 1].setZero();
f_avx[3 * i + 2].setZero();
}
// compute sparse matrix-vector product in parallel
#pragma omp for schedule(static)
for (int k = 0; k < D.outerSize(); ++k)
{
for (Eigen::SparseMatrix<float, Eigen::RowMajor>::InnerIterator it(D, k); it; ++it)
{
f_avx[it.row()] += Scalarf8(it.value()) * sol_avx[it.col()];
}
}
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
// copy data from f_avx to m_F
float x0[8], x1[8], x2[8];
f_avx[3 * i].store(x0);
f_avx[3 * i + 1].store(x1);
f_avx[3 * i + 2].store(x2);
m_F[particleIndex] << x0[0], x0[1], x0[2],
x1[0], x1[1], x1[2],
x2[0], x2[1], x2[2];
}
#pragma omp for schedule(static)
for (int i = 0; i < vecSize; i++)
{
const int count = std::min(numParticles - i * 8, 8);
// store the deformation gradient of 8 particles in avx vectors
int idx[8];
for (int j = 0; j < count; j++)
idx[j] = m_initial_to_current_index[group[8 * i + j]];
for (int j = count; j < 8; j++)
idx[j] = 0;
Vector3f8 F1, F2, F3; //columns of the deformation gradient
F1 = Vector3f8(m_F[idx[0]].col(0), m_F[idx[1]].col(0), m_F[idx[2]].col(0), m_F[idx[3]].col(0), m_F[idx[4]].col(0), m_F[idx[5]].col(0), m_F[idx[6]].col(0), m_F[idx[7]].col(0));
F2 = Vector3f8(m_F[idx[0]].col(1), m_F[idx[1]].col(1), m_F[idx[2]].col(1), m_F[idx[3]].col(1), m_F[idx[4]].col(1), m_F[idx[5]].col(1), m_F[idx[6]].col(1), m_F[idx[7]].col(1));
F3 = Vector3f8(m_F[idx[0]].col(2), m_F[idx[1]].col(2), m_F[idx[2]].col(2), m_F[idx[3]].col(2), m_F[idx[4]].col(2), m_F[idx[5]].col(2), m_F[idx[6]].col(2), m_F[idx[7]].col(2));
// perform analytic polar decomposition
Quaternion8f& q = quats[i];
MathFunctions_AVX::APD_Newton_AVX(F1, F2, F3, q);
//transform quaternion to rotation matrix
Vector3f8 R1, R2, R3; //columns of the rotation matrix
quats[i].toRotationMatrix(R1, R2, R3);
std::array<Vector3r, 8> r0, r1, r2;
R1.store(r0.data());
R2.store(r1.data());
R3.store(r2.data());
for (auto j = 0; j < count; j++)
{
m_rotations[idx[j]].col(0) = r0[j];
m_rotations[idx[j]].col(1) = r1[j];
m_rotations[idx[j]].col(2) = r2[j];
}
}
}
}
}
void Elasticity_Kugelstadt2021::stepElasticitySolver()
{
START_TIMING("Elasticity_Kugelstadt2021")
const unsigned int numActiveParticles = m_model->numActiveParticles();
if (numActiveParticles == 0)
return;
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
Simulation* sim = Simulation::getCurrent();
size_t numObjects = m_objects.size();
// solve the systems for each object separately
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
ElasticObject* obj = m_objects[objIndex];
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
auto& D = obj->m_factorization->m_D;
auto& DT_K = obj->m_factorization->m_DT_K;
auto& HT_K_H = obj->m_factorization->m_matHTH;
auto& RHS = obj->m_RHS;
auto& f_avx = obj->m_f_avx;
auto& sol_avx = obj->m_sol_avx;
auto& quats = obj->m_quats_avx;
START_TIMING("advect x & Dx")
#pragma omp parallel default(shared)
{
// advect particles to get \tilde x in Eq. 29:
// store the 3 components of the advected positions in f_avx
// store the 3 components of dt*velocity in v_avx
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const size_t numNeighbors = m_initialNeighbors[i0].size();
const Real fdt = obj->m_factorization->m_dt;
const Vector3r x = m_model->getPosition(particleIndex);
const Vector3r dv = fdt * m_model->getVelocity(particleIndex);
const Vector3r xNew = x + dv;
// copy the 3 coordinates to sol_avx
sol_avx[i] = Scalarf8(xNew[0], xNew[1], xNew[2], 0, 0, 0, 0, 0);
f_avx[3 * i].setZero();
f_avx[3 * i + 1].setZero();
f_avx[3 * i + 2].setZero();
}
// compute deformation gradient F by sparse matrix-vector product in parallel:
//
// f_avx = D * x_advected (Eq. 12 and Eq. 29)
#pragma omp for schedule(static)
for (int k = 0; k < D.outerSize(); ++k)
{
for (Eigen::SparseMatrix<float, Eigen::RowMajor>::InnerIterator it(D, k); it; ++it)
{
f_avx[it.row()] += Scalarf8(it.value()) * sol_avx[it.col()];
}
}
}
STOP_TIMING_AVG;
int vecSize;
if (numParticles % 8 == 0)
vecSize = numParticles / 8;
else
vecSize = numParticles / 8 + 1;
START_TIMING("extract rot");
#pragma omp parallel default(shared)
{
// extract deformation gradient from avx values f_avx
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
// copy data from f_avx to m_F
float x0[8], x1[8], x2[8];
f_avx[3*i].store(x0);
f_avx[3*i+1].store(x1);
f_avx[3*i+2].store(x2);
m_F[particleIndex] << x0[0], x0[1], x0[2],
x1[0], x1[1], x1[2],
x2[0], x2[1], x2[2];
}
// extract rotation from F by analytic polar decomposition (Kugelstadt et al. 2018)
#pragma omp for schedule(static)
for (int i = 0; i < vecSize; i++)
{
const int count = std::min(numParticles - i*8, 8);
// store the deformation gradient of 8 particles in avx vectors
int idx[8];
for (int j=0; j < count; j++)
idx[j] = m_initial_to_current_index[group[8*i + j]];
for (int j = count; j < 8; j++)
idx[j] = 0;
Vector3f8 F1, F2, F3; //columns of the deformation gradient
F1 = Vector3f8(m_F[idx[0]].col(0), m_F[idx[1]].col(0), m_F[idx[2]].col(0), m_F[idx[3]].col(0), m_F[idx[4]].col(0), m_F[idx[5]].col(0), m_F[idx[6]].col(0), m_F[idx[7]].col(0));
F2 = Vector3f8(m_F[idx[0]].col(1), m_F[idx[1]].col(1), m_F[idx[2]].col(1), m_F[idx[3]].col(1), m_F[idx[4]].col(1), m_F[idx[5]].col(1), m_F[idx[6]].col(1), m_F[idx[7]].col(1));
F3 = Vector3f8(m_F[idx[0]].col(2), m_F[idx[1]].col(2), m_F[idx[2]].col(2), m_F[idx[3]].col(2), m_F[idx[4]].col(2), m_F[idx[5]].col(2), m_F[idx[6]].col(2), m_F[idx[7]].col(2));
// perform polar decomposition
Quaternion8f& q = quats[i];
MathFunctions_AVX::APD_Newton_AVX(F1, F2, F3, q);
//transform quaternion to rotation matrix
Vector3f8 R1, R2, R3; //columns of the rotation matrix
quats[i].toRotationMatrix(R1, R2, R3);
// R := R-F
R1 -= F1;
R2 -= F2;
R3 -= F3;
// store result in f_avx
// f_avx has size 3*n and 3 rows contain F-R
std::array<Vector3r, 8> v0, v1, v2;
R1.store(v0.data());
R2.store(v1.data());
R3.store(v2.data());
for (auto j = 0; j < count; j++)
{
f_avx[24*i + 3*j] = Scalarf8(v0[j][0], v1[j][0], v2[j][0], 0, 0, 0, 0, 0);
f_avx[24*i + 3*j + 1] = Scalarf8(v0[j][1], v1[j][1], v2[j][1], 0, 0, 0, 0, 0);
f_avx[24*i + 3*j + 2] = Scalarf8(v0[j][2], v1[j][2], v2[j][2], 0, 0, 0, 0, 0);
if (8*i+j < RHS.size())
RHS[8*i+j] = Scalarf8(0.0f);
}
}
}
STOP_TIMING_AVG
// Compute right hand side
START_TIMING("rhs")
#pragma omp parallel default(shared)
{
// Compute D^T K * (R-F) (Eq. 29)
// Note: K already contains the factor: 2 dt^2
#pragma omp for schedule(static)
for (int k = 0; k < DT_K.outerSize(); ++k)
{
for (Eigen::SparseMatrix<float, Eigen::RowMajor>::InnerIterator it(DT_K, k); it; ++it)
{
if (it.row() < (int) RHS.size())
RHS[it.row()] += Scalarf8(it.value()) * f_avx[it.col()];
}
}
}
// If zero energy model control is turned on,
// add the following term to the right hand side (Eq. 29):
// H^T * K2 * H * x_advected
// Note: K2 already contains the factor: dt^2
if (m_alpha != 0.0)
{
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int k = 0; k < HT_K_H.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(HT_K_H, k); it; ++it)
{
if (it.col() < (int)RHS.size())
RHS[it.col()] -= Scalarf8(static_cast<float>(it.value())) * sol_avx[it.row()];
}
}
}
}
// Copy the right hand side in a vector for the cholesky solver
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)RHS.size(); i++)
{
float x[8];
RHS[i].store(x);
obj->m_rhs[3 * i] = x[0];
obj->m_rhs[3 * i + 1] = x[1];
obj->m_rhs[3 * i + 2] = x[2];
}
}
STOP_TIMING_AVG;
}
// Solve linear system
START_TIMING("solve SLE")
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < 3*numObjects; i++)
{
int objIndex = i / 3;
int index = i % 3;
ElasticObject* obj = m_objects[objIndex];
obj->m_factorization->m_cholesky->solve(&obj->m_sol[index], &obj->m_rhs[index], /* stride = */ 3);
}
}
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
ElasticObject* obj = m_objects[objIndex];
const std::vector<unsigned int>& group = obj->m_particleIndices;
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int) obj->m_sol.size()/3; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
if (m_model->getParticleState(particleIndex) == ParticleState::Active)
{
Vector3r& vi = m_model->getVelocity(particleIndex);
const Vector3r &dx = obj->m_sol.segment<3>(3*i);
const Real fdt = obj->m_factorization->m_dt;
vi += (1.0 / fdt) * dx;
}
}
}
}
STOP_TIMING_AVG
STOP_TIMING_AVG
}
void Elasticity_Kugelstadt2021::matrixVecProd(const Real* vec, Real *result, void *userData)
{
Simulation *sim = Simulation::getCurrent();
Elasticity_Kugelstadt2021* elasticity = static_cast<Elasticity_Kugelstadt2021*>(userData);
FluidModel *model = elasticity->getModel();
const unsigned int numParticles = model->numActiveParticles();
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
const auto ¤t_to_initial_index = elasticity->m_current_to_initial_index;
const auto &initial_to_current_index = elasticity->m_initial_to_current_index;
const auto &initialNeighbors = elasticity->m_initialNeighbors;
const auto &restVolumes = elasticity->m_restVolumes;
auto &stress = elasticity->m_stress;
const auto& precomp_RL_gradW8 = elasticity->m_precomp_RL_gradW8;
const auto& precomp_RLj_gradW8 = elasticity->m_precomp_RLj_gradW8;
const auto& precomputed_indices8 = elasticity->m_precomputed_indices8;
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = current_to_initial_index[i];
const Vector3r& pi = Eigen::Map<const Vector3r>(&vec[3 * i], 3);
const unsigned int numNeighbors = (unsigned int)initialNeighbors[i0].size();
const Vector3f8 pi_avx(pi);
// compute corotated deformation gradient
Scalarf8 trace_avx;
trace_avx.setZero();
for (unsigned int j = 0; j < numNeighbors; j += 8)
{
const unsigned int count = std::min(numNeighbors - j, 8u);
unsigned int nIndices[8];
for (auto k = 0u; k < count; k++)
nIndices[k] = initial_to_current_index[initialNeighbors[i0][j + k]];
const Vector3f8 pj_avx = convertVec_zero(nIndices, &vec[0], count);
const Vector3f8 pj_pi = pj_avx - pi_avx;
const Vector3f8& correctedRotatedKernel = precomp_RL_gradW8[precomputed_indices8[i] + j / 8];
// We need the trace of the strain tensor. Therefore, we are only interested in the
// diagonal elements which are F_ii-1. However, instead of computing the trace
// of a dyadic product, we can determine the dot product of the vectors to get the trace
// of F. Then the trace of the strain is the result minus 3.
trace_avx += pj_pi.dot(correctedRotatedKernel);
}
Real trace = trace_avx.reduce();
trace *= dt;
// First Piola Kirchhoff stress = lambda trace(epsilon) I
stress[i] = elasticity->m_lambda * trace;
}
}
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (model->getParticleState(i) == ParticleState::Active)
{
const unsigned int i0 = current_to_initial_index[i];
const unsigned int numNeighbors = (unsigned int) initialNeighbors[i0].size();
const Scalarf8 stress_i_avx(stress[i]);
const Scalarf8 restVolume_i_avx(restVolumes[i]);
// Compute elastic force f^l_i in Eq. 30
Vector3f8 force_avx;
force_avx.setZero();
for (unsigned int j = 0; j < numNeighbors; j+=8)
{
const unsigned int count = std::min(numNeighbors - j, 8u);
unsigned int nIndices[8];
for (auto k = 0u; k < count; k++)
nIndices[k] = initial_to_current_index[initialNeighbors[i0][j + k]];
const Scalarf8 stress_j_avx = convert_zero(nIndices, &stress[0], count);
const Scalarf8 restVolume_j_avx = convert_zero(nIndices, &restVolumes[0], count);
const Vector3f8& V_RL_gradWi = precomp_RL_gradW8[precomputed_indices8[i] + j / 8];
const Vector3f8& V_RL_gradWj = precomp_RLj_gradW8[precomputed_indices8[i] + j / 8];
force_avx += (V_RL_gradWi * (restVolume_i_avx * stress_i_avx) + V_RL_gradWj * (restVolume_j_avx * stress_j_avx));
}
const Real factor = dt / model->getMass(i);
result[3 * i] = vec[3 * i] - factor * force_avx.x().reduce();
result[3 * i + 1] = vec[3 * i + 1] - factor * force_avx.y().reduce();
result[3 * i + 2] = vec[3 * i + 2] - factor * force_avx.z().reduce();
}
else
{
result[3 * i] = vec[3 * i];
result[3 * i + 1] = vec[3 * i + 1];
result[3 * i + 2] = vec[3 * i + 2];
}
}
}
}
void Elasticity_Kugelstadt2021::computeF(const unsigned int i, const Real* x, Elasticity_Kugelstadt2021 *e)
{
const unsigned int i0 = e->m_current_to_initial_index[i];
const Vector3r& vi = Eigen::Map<const Vector3r>(&x[3 * i], 3);
const unsigned int numNeighbors = (unsigned int)e->m_initialNeighbors[i0].size();
const Vector3f8 vi_avx(vi);
// compute corotated deformation gradient (Eq. 12)
Matrix3f8 F_avx;
F_avx.setZero();
// Fluid
for (unsigned int j = 0; j < numNeighbors; j += 8)
{
const unsigned int count = std::min(numNeighbors - j, 8u);
unsigned int nIndices[8];
for (auto k = 0u; k < count; k++)
nIndices[k] = e->m_initial_to_current_index[e->m_initialNeighbors[i0][j + k]];
const Vector3f8 vj_avx = convertVec_zero(nIndices, &x[0], count);
const Vector3f8 vj_vi = vj_avx - vi_avx;
const Vector3f8& correctedKernel = e->m_precomp_L_gradW8[e->m_precomputed_indices8[i] + j / 8];
Matrix3f8 dyad;
dyadicProduct(vj_vi, correctedKernel, dyad);
F_avx += dyad;
}
e->m_F[i] = F_avx.reduce();
if (Simulation::getCurrent()->is2DSimulation())
e->m_F[i](2, 2) = 1.0;
}
void Elasticity_Kugelstadt2021::computeRHS(VectorXr & rhs)
{
Simulation *sim = Simulation::getCurrent();
const unsigned int numParticles = m_model->numActiveParticles();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
FluidModel *model = m_model;
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
#pragma omp parallel default(shared)
{
// update the deformation gradient
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (model->getParticleState(i) == ParticleState::Active)
computeF(i, &m_model->getPosition(0)[0], this);
}
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (m_model->getParticleState(i) == ParticleState::Active)
{
// trace of R^T F - I
//const Matrix3r RTF = (m_rotations[i].transpose() * m_F[i]);
//const Real trace = RTF(0, 0) + RTF(1, 1) + RTF(2, 2) - 3.0;
// short form of: trace(R^T F - I)
const Real trace = m_rotations[i].row(0).dot(m_F[i].col(0)) +
m_rotations[i].row(1).dot(m_F[i].col(1)) +
m_rotations[i].row(2).dot(m_F[i].col(2)) - static_cast<Real>(3.0);
m_stress[i] = m_lambda * trace;
}
else
m_stress[i] = 0.0;
}
}
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (model->getParticleState(i) == ParticleState::Active)
{
const unsigned int i0 = m_current_to_initial_index[i];
const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size();
const Scalarf8 stress_i_avx(m_stress[i]);
const Scalarf8 restVolume_i_avx(m_restVolumes[i]);
// Compute elastic force (Eq. 30)
Vector3f8 force_avx;
force_avx.setZero();
for (unsigned int j = 0; j < numNeighbors; j += 8)
{
const unsigned int count = std::min(numNeighbors - j, 8u);
unsigned int nIndices[8];
for (auto k = 0u; k < count; k++)
nIndices[k] = m_initial_to_current_index[m_initialNeighbors[i0][j + k]];
const Scalarf8 stress_j_avx = convert_zero(nIndices, &m_stress[0], count);
const Scalarf8 restVolume_j_avx = convert_zero(nIndices, &m_restVolumes[0], count);
const Vector3f8& correctedRotatedKernel_i = m_precomp_RL_gradW8[m_precomputed_indices8[i] + j / 8];
const Vector3f8& correctedRotatedKernel_j = m_precomp_RLj_gradW8[m_precomputed_indices8[i] + j / 8];
const Vector3f8 PWi = correctedRotatedKernel_i * stress_i_avx;
const Vector3f8 PWj = correctedRotatedKernel_j * stress_j_avx;
force_avx += (PWi * restVolume_i_avx + PWj * restVolume_j_avx);
}
Vector3r force;
force[0] = force_avx.x().reduce();
force[1] = force_avx.y().reduce();
force[2] = force_avx.z().reduce();
rhs.segment<3>(3 * i) = model->getVelocity(i) + dt * (1.0 / model->getMass(i) * force);
}
else
{
rhs.segment<3>(3 * i) = model->getVelocity(i);
}
}
}
}
#else
void Elasticity_Kugelstadt2021::computeRotations()
{
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
size_t numObjects = m_objects.size();
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
ElasticObject* obj = m_objects[objIndex];
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
auto& D = obj->m_factorization->m_D;
auto& f = obj->m_f;
auto& sol = obj->m_sol;
auto& quats = obj->m_quats;
// advect particles
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Vector3r& xi0 = m_model->getPosition0(i0);
const size_t numNeighbors = m_initialNeighbors[i0].size();
Vector3r xNew = m_model->getPosition(particleIndex);
xNew += dt * m_model->getVelocity(particleIndex);
sol[i] = xNew;
f[3 * i].setZero();
f[3 * i + 1].setZero();
f[3 * i + 2].setZero();
}
// compute sparse matrix-vector product in parallel
#pragma omp for schedule(static)
for (int k = 0; k < D.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, k); it; ++it)
{
f[it.row()] += it.value() * sol[it.col()];
}
}
// extract deformation gradient from values f
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Vector3r &x0 = f[3 * i];
const Vector3r &x1 = f[3 * i + 1];
const Vector3r &x2 = f[3 * i + 2];
m_F[particleIndex] << x0[0], x0[1], x0[2],
x1[0], x1[1], x1[2],
x2[0], x2[1], x2[2];
Quaternionr& q = quats[i];
MathFunctions::APD_Newton(m_F[particleIndex], q);
m_rotations[particleIndex] = q.matrix().transpose();
}
}
}
}
void Elasticity_Kugelstadt2021::stepElasticitySolver()
{
START_TIMING("Elasticity_Kugelstadt2021")
const unsigned int numActiveParticles = m_model->numActiveParticles();
if (numActiveParticles == 0)
return;
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
Simulation* sim = Simulation::getCurrent();
size_t numObjects = m_objects.size();
// solve the systems for each object separately
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
ElasticObject* obj = m_objects[objIndex];
const std::vector<unsigned int>& group = obj->m_particleIndices;
int numParticles = (int)group.size();
auto& D = obj->m_factorization->m_D;
auto& DT_K = obj->m_factorization->m_DT_K;
auto& HT_K_H = obj->m_factorization->m_matHTH;
auto& RHS = obj->m_RHS;
auto& RHS_perm = obj->m_RHS_perm;
auto& permInd = obj->m_factorization->m_permInd;
auto& f = obj->m_f;
auto& quats = obj->m_quats;
auto& sol = obj->m_sol;
START_TIMING("advect x & Dx")
#pragma omp parallel default(shared)
{
// advect particles to get \tilde x in Eq. 29:
// store the 3 components of the advected positions in f_avx
// store the 3 components of dt*velocity in v_avx
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const size_t numNeighbors = m_initialNeighbors[i0].size();
const Real fdt = obj->m_factorization->m_dt;
const Vector3r x = m_model->getPosition(particleIndex);
const Vector3r dv = fdt * m_model->getVelocity(particleIndex);
const Vector3r xNew = x + dv;
sol[i] = xNew;
f[3 * i].setZero();
f[3 * i + 1].setZero();
f[3 * i + 2].setZero();
}
// compute deformation gradient F by sparse matrix-vector product in parallel:
//
// f = D * x_advected (Eq. 12 and Eq. 29)
#pragma omp for schedule(static)
for (int k = 0; k < D.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(D, k); it; ++it)
{
f[it.row()] += it.value() * sol[it.col()];
}
}
// extract deformation gradient from values f
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
const Vector3r &x0 = f[3 * i];
const Vector3r &x1 = f[3 * i + 1];
const Vector3r &x2 = f[3 * i + 2];
m_F[particleIndex] << x0[0], x0[1], x0[2],
x1[0], x1[1], x1[2],
x2[0], x2[1], x2[2];
Quaternionr& q = quats[i];
MathFunctions::APD_Newton(m_F[particleIndex], q);
m_rotations[particleIndex] = q.matrix();
f[3 * i] = m_rotations[particleIndex].row(0) - m_F[particleIndex].row(0);
f[3 * i + 1] = m_rotations[particleIndex].row(1) - m_F[particleIndex].row(1);
f[3 * i + 2] = m_rotations[particleIndex].row(2) - m_F[particleIndex].row(2);
if (i < RHS.size())
RHS[i].setZero();
}
}
STOP_TIMING_AVG;
// Compute right hand side
START_TIMING("rhs")
#pragma omp parallel default(shared)
{
// Compute D^T K * (R-F) (Eq. 29)
// Note: K already contains the factor: 2 dt^2
#pragma omp for schedule(static)
for (int k = 0; k < DT_K.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::RowMajor>::InnerIterator it(DT_K, k); it; ++it)
{
if (it.row() < (int) RHS.size())
RHS[it.row()] += it.value() * f[it.col()];
}
}
}
// If zero energy model control is turned on,
// add the following term to the right hand side (Eq. 29):
// H^T * K2 * H * x_advected
// Note: K2 already contains the factor: dt^2
if (m_alpha != 0.0)
{
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int k = 0; k < HT_K_H.outerSize(); ++k)
{
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(HT_K_H, k); it; ++it)
{
if (it.col() < (int)RHS.size())
RHS[it.col()] -= it.value() * sol[it.row()];
}
}
}
}
#pragma omp parallel default(shared)
{
//permutation of the RHS because of Eigen's fill-in reduction
#pragma omp for schedule(static)
for (int i = 0; i < (int)RHS.size(); i++)
RHS_perm[permInd[i]] = RHS[i];
}
STOP_TIMING_AVG;
}
// Solve linear system
START_TIMING("solve SLE")
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int objIndex = 0; objIndex < (int) numObjects; objIndex++)
{
ElasticObject* obj = m_objects[objIndex];
auto& RHS_perm = obj->m_RHS_perm;
auto& matL = obj->m_factorization->m_matL;
auto& matLT = obj->m_factorization->m_matLT;
for (int k = 0; k < matL.outerSize(); ++k)
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::InnerIterator it(matL, k); it; ++it)
if (it.row() == it.col())
RHS_perm[it.row()] /= it.value();
else
RHS_perm[it.row()] -= it.value() * RHS_perm[it.col()];
//backward substitution
for (int k = (int) matLT.outerSize() - 1; k >= 0; --k)
for (Eigen::SparseMatrix<Real, Eigen::ColMajor>::ReverseInnerIterator it(matLT, k); it; --it)
if (it.row() == it.col())
RHS_perm[it.row()] /= it.value();
else
RHS_perm[it.row()] -= it.value() * RHS_perm[it.col()];
}
}
for (auto objIndex = 0; objIndex < numObjects; objIndex++)
{
ElasticObject* obj = m_objects[objIndex];
const std::vector<unsigned int>& group = obj->m_particleIndices;
auto& RHS = obj->m_RHS;
auto& RHS_perm = obj->m_RHS_perm;
auto& permInvInd = obj->m_factorization->m_permInvInd;
#pragma omp parallel default(shared)
{
//invert permutation
#pragma omp for schedule(static)
for (int i = 0; i < (int)RHS.size(); i++)
RHS[permInvInd[i]] = RHS_perm[i];
#pragma omp for schedule(static)
for (int i = 0; i < (int)RHS.size(); i++)
{
const unsigned int i0 = group[i];
const unsigned int particleIndex = m_initial_to_current_index[i0];
if (m_model->getParticleState(particleIndex) == ParticleState::Active)
{
Vector3r& vi = m_model->getVelocity(particleIndex);
const Vector3r &dx = RHS[i];
const Real fdt = obj->m_factorization->m_dt;
vi += (1.0 / fdt) * dx;
}
}
}
}
STOP_TIMING_AVG
STOP_TIMING_AVG
}
void Elasticity_Kugelstadt2021::matrixVecProd(const Real* vec, Real *result, void *userData)
{
Simulation *sim = Simulation::getCurrent();
Elasticity_Kugelstadt2021* elasticity = static_cast<Elasticity_Kugelstadt2021*>(userData);
FluidModel *model = elasticity->getModel();
const unsigned int numParticles = model->numActiveParticles();
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
const auto ¤t_to_initial_index = elasticity->m_current_to_initial_index;
const auto &initial_to_current_index = elasticity->m_initial_to_current_index;
const auto &initialNeighbors = elasticity->m_initialNeighbors;
const auto &restVolumes = elasticity->m_restVolumes;
auto &stress = elasticity->m_stress;
const auto &precomp_RL_gradW = elasticity->m_precomp_RL_gradW;
const auto& precomp_RLj_gradW = elasticity->m_precomp_RLj_gradW;
const auto &precomputed_indices = elasticity->m_precomputed_indices;
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (model->getParticleState(i) == ParticleState::Active)
{
const unsigned int i0 = current_to_initial_index[i];
const Vector3r &pi = Eigen::Map<const Vector3r>(&vec[3 * i], 3);
const unsigned int numNeighbors = (unsigned int) initialNeighbors[i0].size();
// compute corotated deformation gradient
// Fluid
Real trace = 0.0;
for (unsigned int j = 0; j < numNeighbors; j++)
{
const unsigned int neighborIndex = initial_to_current_index[initialNeighbors[i0][j]];
// get initial neighbor index considering the current particle order
const unsigned int neighborIndex0 = initialNeighbors[i0][j];
const Vector3r &pj = Eigen::Map<const Vector3r>(&vec[3 * neighborIndex], 3);
const Vector3r pj_pi = pj - pi;
const Vector3r correctedRotatedKernel = precomp_RL_gradW[precomputed_indices[i] + j];
// We need the trace of the strain tensor. Therefore, we are only interested in the
// diagonal elements which are F_ii-1. However, instead of computing the trace
// of a dyadic product, we can determine the dot product of the vectors to get the trace
// of F. Then the trace of the strain is the result minus 3.
trace += pj_pi.dot(correctedRotatedKernel);
}
trace *= dt;
// First Piola Kirchhoff stress = 2 mu epsilon + lambda trace(epsilon) I
stress[i] = elasticity->m_lambda*trace;
}
else
stress[i] = 0.0;
}
}
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (model->getParticleState(i) == ParticleState::Active)
{
const unsigned int i0 = current_to_initial_index[i];
const size_t numNeighbors = initialNeighbors[i0].size();
// Compute elastic force
Vector3r force;
force.setZero();
for (unsigned int j = 0; j < numNeighbors; j++)
{
const unsigned int neighborIndex = initial_to_current_index[initialNeighbors[i0][j]];
// get initial neighbor index considering the current particle order
const unsigned int neighborIndex0 = initialNeighbors[i0][j];
const Vector3r correctedRotatedKernel_i = precomp_RL_gradW[precomputed_indices[i] + j];
const Vector3r correctedRotatedKernel_j = -precomp_RLj_gradW[precomputed_indices[i] + j];
const Vector3r PWi = stress[i] * correctedRotatedKernel_i;
const Vector3r PWj = stress[neighborIndex] * correctedRotatedKernel_j;
force += (restVolumes[i] * PWi - restVolumes[neighborIndex] * PWj);
}
const Real factor = dt / model->getMass(i);
result[3 * i] = vec[3 * i] - factor * force[0];
result[3 * i + 1] = vec[3 * i + 1] - factor * force[1];
result[3 * i + 2] = vec[3 * i + 2] - factor * force[2];
}
else
{
result[3 * i] = vec[3 * i];
result[3 * i + 1] = vec[3 * i + 1];
result[3 * i + 2] = vec[3 * i + 2];
}
}
}
}
void Elasticity_Kugelstadt2021::computeRHS(VectorXr & rhs)
{
Simulation *sim = Simulation::getCurrent();
const unsigned int numParticles = m_model->numActiveParticles();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
FluidModel *model = m_model;
const Real dt = TimeManager::getCurrent()->getTimeStepSize();
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
m_F[i].setZero();
if (m_model->getParticleState(i) == ParticleState::Active)
{
// compute corotated deformation gradient (Eq. 12)
const unsigned int i0 = m_current_to_initial_index[i];
const Vector3r &xi = m_model->getPosition(i);
const size_t numNeighbors = m_initialNeighbors[i0].size();
// Fluid
for (unsigned int j = 0; j < numNeighbors; j++)
{
const unsigned int neighborIndex = m_initial_to_current_index[m_initialNeighbors[i0][j]];
const Vector3r &xj = model->getPosition(neighborIndex);
const Vector3r xj_xi = xj - xi;
const Vector3r correctedKernel = m_precomp_L_gradW[m_precomputed_indices[i] + j];
m_F[i] += xj_xi * correctedKernel.transpose();
}
if (sim->is2DSimulation())
m_F[i](2, 2) = 1.0;
// short form of: trace(R^T F - I)
const Real trace = m_rotations[i].col(0).dot(m_F[i].col(0)) +
m_rotations[i].col(1).dot(m_F[i].col(1)) +
m_rotations[i].col(2).dot(m_F[i].col(2)) - static_cast<Real>(3.0);
m_stress[i] = m_lambda*trace;
}
else
m_stress[i] = 0.0;
}
}
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
if (model->getParticleState(i) == ParticleState::Active)
{
const unsigned int i0 = m_current_to_initial_index[i];
const Vector3r& xi0 = m_model->getPosition0(i0);
const size_t numNeighbors = m_initialNeighbors[i0].size();
// Compute elastic force
Vector3r force;
force.setZero();
for (unsigned int j = 0; j < numNeighbors; j++)
{
const unsigned int neighborIndex = m_initial_to_current_index[m_initialNeighbors[i0][j]];
const Vector3r correctedRotatedKernel_i = m_precomp_RL_gradW[m_precomputed_indices[i] + j];
const Vector3r correctedRotatedKernel_j = m_precomp_RLj_gradW[m_precomputed_indices[i] + j];
const Vector3r PWi = m_stress[i] * correctedRotatedKernel_i;
const Vector3r PWj = m_stress[neighborIndex] * correctedRotatedKernel_j;
force += (PWi * m_restVolumes[i] + PWj * m_restVolumes[neighborIndex]);
}
rhs.segment<3>(3 * i) = model->getVelocity(i) + dt * (1.0 / model->getMass(i) * force);
}
else
{
rhs.segment<3>(3 * i) = model->getVelocity(i);
}
}
}
}
#endif
void Elasticity_Kugelstadt2021::precomputeValues()
{
Simulation* sim = Simulation::getCurrent();
const unsigned int fluidModelIndex = m_model->getPointSetIndex();
const int numParticles = (int)m_model->numActiveParticles();
#ifdef USE_AVX
m_precomputed_indices8.clear();
m_precomp_L_gradW8.clear();
m_precomp_RL_gradW8.clear();
m_precomp_RLj_gradW8.clear();
m_precomputed_indices8.resize(numParticles);
#else
m_precomputed_indices.clear();
m_precomp_L_gradW.clear();
m_precomp_RL_gradW.clear();
m_precomp_RLj_gradW.clear();
m_precomputed_indices.resize(numParticles);
#endif
unsigned int sumNeighborParticles = 0;
unsigned int sumNeighborParticles8 = 0;
for (int i = 0; i < numParticles; i++)
{
const unsigned int i0 = m_current_to_initial_index[i];
const size_t numNeighbors = m_initialNeighbors[i0].size();
#ifdef USE_AVX
m_precomputed_indices8[i] = sumNeighborParticles8;
// steps of 8 values due to avx
sumNeighborParticles8 += (unsigned int) numNeighbors / 8u;
if (numNeighbors % 8 != 0)
sumNeighborParticles8++;
#else
m_precomputed_indices[i] = sumNeighborParticles;
sumNeighborParticles += numNeighbors;
#endif
}
#ifdef USE_AVX
m_precomp_L_gradW8.resize(sumNeighborParticles8);
m_precomp_RL_gradW8.resize(sumNeighborParticles8);
m_precomp_RLj_gradW8.resize(sumNeighborParticles8);
#else
m_precomp_L_gradW.resize(sumNeighborParticles);
m_precomp_RL_gradW.resize(sumNeighborParticles);
m_precomp_RLj_gradW.resize(sumNeighborParticles);
#endif
#pragma omp parallel default(shared)
{
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
m_RL[i] = m_rotations[i].transpose() * m_L[i];
}
#pragma omp for schedule(static)
for (int i = 0; i < (int)numParticles; i++)
{
const unsigned int i0 = m_current_to_initial_index[i];
const Vector3r& xi0 = m_model->getPosition0(i0);
const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size();
#ifdef USE_AVX
const Vector3f8 xi0_avx(xi0);
Scalarf8 restVolume_i_avx(m_restVolumes[i]);
unsigned int base8 = m_precomputed_indices8[i];
unsigned int idx = 0;
Matrix3f8 L_avx(m_L[i]);
Matrix3f8 RL_avx(m_RL[i]);
for (unsigned int j = 0; j < numNeighbors; j += 8)
{
const unsigned int count = std::min(numNeighbors - j, 8u);
unsigned int nIndices[8];
for (auto k = 0u; k < count; k++)
nIndices[k] = m_initial_to_current_index[m_initialNeighbors[i0][j + k]];
const Scalarf8 restVolume_j_avx = convert_zero(nIndices, &m_restVolumes[0], count);
const Vector3f8 xj0_avx = convertVec_zero(&m_initialNeighbors[i0][j], &m_model->getPosition0(0), count);
const Matrix3f8 RLj_avx = convertMat_zero(nIndices, &m_RL[0], count);
const Vector3f8 gradW = CubicKernel_AVX::gradW(xi0_avx - xj0_avx);
m_precomp_L_gradW8[base8 + idx] = L_avx * gradW * restVolume_j_avx;
m_precomp_RL_gradW8[base8 + idx] = RL_avx * gradW * restVolume_j_avx;
m_precomp_RLj_gradW8[base8 + idx] = RLj_avx * gradW * restVolume_i_avx;
idx++;
}
#else
unsigned int base = m_precomputed_indices[i];
for (unsigned int j = 0; j < numNeighbors; j++)
{
// get initial neighbor index considering the current particle order
const unsigned int neighborIndex0 = m_initialNeighbors[i0][j];
const unsigned int neighborIndex = m_initial_to_current_index[m_initialNeighbors[i0][j]];
const Vector3r& xj0 = m_model->getPosition0(neighborIndex0);
Vector3r gradW = sim->gradW(xi0 - xj0);
m_precomp_L_gradW[base + j] = m_restVolumes[neighborIndex] * m_L[i] * gradW;
m_precomp_RL_gradW[base + j] = m_restVolumes[neighborIndex] * m_RL[i] * gradW;
m_precomp_RLj_gradW[base + j] = m_restVolumes[i] * m_RL[neighborIndex] * gradW;
}
#endif
}
}
}