.. _program_listing_file_SPlisHSPlasH_Elasticity_Elasticity_Kugelstadt2021.cpp: Program Listing for File Elasticity_Kugelstadt2021.cpp ====================================================== |exhale_lsh| :ref:`Return to documentation for file ` (``SPlisHSPlasH/Elasticity/Elasticity_Kugelstadt2021.cpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #include "Elasticity_Kugelstadt2021.h" #include "SPlisHSPlasH/Simulation.h" #include "SPlisHSPlasH/Utilities/MathFunctions.h" #include "SPlisHSPlasH/TimeManager.h" #include "Utilities/Timing.h" #include "Utilities/Counting.h" #include #include "Utilities/BinaryFileReaderWriter.h" #include "Utilities/StringTools.h" #include #include using namespace SPH; using namespace GenParam; std::string Elasticity_Kugelstadt2021::METHOD_NAME = "Kugelstadt et al. 2021"; int Elasticity_Kugelstadt2021::YOUNGS_MODULUS = -1; int Elasticity_Kugelstadt2021::POISSON_RATIO = -1; int Elasticity_Kugelstadt2021::FIXED_BOX_MIN = -1; int Elasticity_Kugelstadt2021::FIXED_BOX_MAX = -1; int Elasticity_Kugelstadt2021::ITERATIONS_V = -1; int Elasticity_Kugelstadt2021::MAX_ITERATIONS_V = -1; int Elasticity_Kugelstadt2021::MAX_ERROR_V = -1; int Elasticity_Kugelstadt2021::ALPHA = -1; int Elasticity_Kugelstadt2021::MAX_NEIGHBORS = -1; Elasticity_Kugelstadt2021::Elasticity_Kugelstadt2021(FluidModel *model) : NonPressureForceBase(model) { const unsigned int numParticles = model->numActiveParticles(); m_restVolumes.resize(numParticles); m_current_to_initial_index.resize(numParticles); m_initial_to_current_index.resize(numParticles); m_initialNeighbors.resize(numParticles); m_rotations.resize(numParticles, Matrix3r::Identity()); m_stress.resize(numParticles); m_L.resize(numParticles); // kernel gradient correction matrix L m_F.resize(numParticles); // deformation gradient m_RL.resize(numParticles); // stores the rotation matrix times the matrix L m_vDiff.resize(numParticles, Vector3r::Zero()); // velocity difference used for the warm start of the volume cg solver m_youngsModulus = static_cast(100000.0); m_poissonRatio = static_cast(0.3); m_iterationsV = 0; m_maxIterV = 100; m_maxErrorV = static_cast(1.0e-4); m_alpha = 0.0; m_youngsModulus = 5000000; m_maxNeighbors = -1; m_fixedBoxMin.setZero(); m_fixedBoxMax.setZero(); model->addField({ "rest volume", METHOD_NAME, FieldType::Scalar, [&](const unsigned int i) -> Real* { return &m_restVolumes[i]; }, true }); model->addField({ "rotation", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_rotations[i](0,0); } }); model->addField({ "stress", METHOD_NAME, FieldType::Scalar, [&](const unsigned int i) -> Real* { return &m_stress[i]; } }); model->addField({ "deformation gradient", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_F[i](0,0); } }); model->addField({ "correction matrix", METHOD_NAME, FieldType::Matrix3, [&](const unsigned int i) -> Real* { return &m_L[i](0,0); } }); } Elasticity_Kugelstadt2021::~Elasticity_Kugelstadt2021(void) { m_model->removeFieldByName("rest volume"); m_model->removeFieldByName("rotation"); m_model->removeFieldByName("stress"); m_model->removeFieldByName("deformation gradient"); m_model->removeFieldByName("correction matrix"); for (auto objIndex = 0; objIndex < m_objects.size(); objIndex++) { delete m_objects[objIndex]; } m_objects.clear(); } void Elasticity_Kugelstadt2021::deferredInit() { initValues(); } void Elasticity_Kugelstadt2021::initParameters() { NonPressureForceBase::initParameters(); ParameterBase::GetFunc getFctYM = [&]()-> Real { return m_youngsModulus; }; ParameterBase::SetFunc setFctYM = [&](Real val) { m_youngsModulus = val; m_mu = m_youngsModulus / (static_cast(2.0) * (static_cast(1.0) + m_poissonRatio)); m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast(1.0) + m_poissonRatio) * (static_cast(1.0) - static_cast(2.0) * m_poissonRatio)); if (Simulation::getCurrent()->isSimulationInitialized()) // if Young's modulus has changed, recompute the factorization Simulation::getCurrent()->reset(); }; YOUNGS_MODULUS = createNumericParameter("youngsModulus", "Young`s modulus", getFctYM, setFctYM); setGroup(YOUNGS_MODULUS, "Fluid Model|Elasticity"); setDescription(YOUNGS_MODULUS, "Stiffness of the elastic material"); RealParameter* rparam = static_cast(getParameter(YOUNGS_MODULUS)); rparam->setMinValue(0.0); ParameterBase::GetFunc getFctPR = [&]()-> Real { return m_poissonRatio; }; ParameterBase::SetFunc setFctPR = [&](Real val) { m_poissonRatio = val; m_mu = m_youngsModulus / (static_cast(2.0) * (static_cast(1.0) + m_poissonRatio)); m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast(1.0) + m_poissonRatio) * (static_cast(1.0) - static_cast(2.0) * m_poissonRatio)); if (Simulation::getCurrent()->isSimulationInitialized()) // if Poisson ration has changed, recompute the factorization Simulation::getCurrent()->reset(); }; POISSON_RATIO = createNumericParameter("poissonsRatio", "Poisson`s ratio", getFctPR, setFctPR); setGroup(POISSON_RATIO, "Fluid Model|Elasticity"); setDescription(POISSON_RATIO, "Ratio of transversal expansion and axial compression"); rparam = static_cast(getParameter(POISSON_RATIO)); rparam->setMinValue(static_cast(-1.0 + 1e-4)); rparam->setMaxValue(static_cast(0.5 - 1e-4)); ITERATIONS_V = createNumericParameter("volumeIterations", "Iterations", &m_iterationsV); setGroup(ITERATIONS_V, "Fluid Model|Elasticity"); setDescription(ITERATIONS_V, "Iterations required by the volume solver."); getParameter(ITERATIONS_V)->setReadOnly(true); MAX_ITERATIONS_V = createNumericParameter("volumeMaxIter", "Max. iterations (volume solver)", &m_maxIterV); setGroup(MAX_ITERATIONS_V, "Fluid Model|Elasticity"); setDescription(MAX_ITERATIONS_V, "Max. iterations of the volume solver"); static_cast*>(getParameter(MAX_ITERATIONS_V))->setMinValue(0); MAX_ERROR_V = createNumericParameter("volumeMaxError", "Max. volume error", &m_maxErrorV); setGroup(MAX_ERROR_V, "Fluid Model|Elasticity"); setDescription(MAX_ERROR_V, "Max. error of the volume solver"); rparam = static_cast(getParameter(MAX_ERROR_V)); rparam->setMinValue(1e-9); ParameterBase::GetFunc getFctAlpha = [&]()-> Real { return m_alpha; }; ParameterBase::SetFunc setFctAlpha = [&](Real val) { m_alpha = val; if (Simulation::getCurrent()->isSimulationInitialized()) // if value has changed, recompute the factorization Simulation::getCurrent()->reset(); }; ALPHA = createNumericParameter("alpha", "Zero-energy modes suppression", getFctAlpha, setFctAlpha); setGroup(ALPHA, "Fluid Model|Elasticity"); setDescription(ALPHA, "Coefficent for zero-energy modes suppression method"); rparam = static_cast(getParameter(ALPHA)); rparam->setMinValue(0.0); ParameterBase::GetFunc getFct5 = [&]()-> int { return m_maxNeighbors; }; ParameterBase::SetFunc setFct5 = [&](int val) { m_maxNeighbors = val; if (Simulation::getCurrent()->isSimulationInitialized()) // if value has changed, recompute the factorization Simulation::getCurrent()->reset(); }; MAX_NEIGHBORS = createNumericParameter("maxNeighbors", "Max. neighbors", getFct5, setFct5); setGroup(MAX_NEIGHBORS, "Fluid Model|Elasticity"); setDescription(MAX_NEIGHBORS, "Maximum number of neighbors that are considered."); ParameterBase::GetVecFunc getFctFMin = [&]()-> Real* { return m_fixedBoxMin.data(); }; ParameterBase::SetVecFunc setFctFMin = [&](Real* val) { m_fixedBoxMin = Vector3r(val[0], val[1], val[2]); determineFixedParticles(); }; FIXED_BOX_MIN = createVectorParameter("fixedBoxMin", "Fixed box min", 3u, getFctFMin, setFctFMin); setGroup(FIXED_BOX_MIN, "Fluid Model|Elasticity"); setDescription(FIXED_BOX_MIN, "Minimum point of box of which contains the fixed particles."); getParameter(FIXED_BOX_MIN)->setReadOnly(true); ParameterBase::GetVecFunc getFctFMax = [&]()-> Real* { return m_fixedBoxMax.data(); }; ParameterBase::SetVecFunc setFctFMax = [&](Real* val) { m_fixedBoxMax = Vector3r(val[0], val[1], val[2]); determineFixedParticles(); }; FIXED_BOX_MAX = createVectorParameter("fixedBoxMax", "Fixed box max", 3u, getFctFMax, setFctFMax); setGroup(FIXED_BOX_MAX, "Fluid Model|Elasticity"); setDescription(FIXED_BOX_MAX, "Maximum point of box of which contains the fixed particles."); getParameter(FIXED_BOX_MAX)->setReadOnly(true); } void Elasticity_Kugelstadt2021::determineFixedParticles() { const unsigned int numParticles = m_model->numActiveParticles(); if (!m_fixedBoxMin.isZero() || !m_fixedBoxMax.isZero()) { for (int i = 0; i < (int)numParticles; i++) { const Vector3r& x = m_model->getPosition0(i); if ((x[0] > m_fixedBoxMin[0]) && (x[1] > m_fixedBoxMin[1]) && (x[2] > m_fixedBoxMin[2]) && (x[0] < m_fixedBoxMax[0]) && (x[1] < m_fixedBoxMax[1]) && (x[2] < m_fixedBoxMax[2])) { m_model->setParticleState(i, ParticleState::Fixed); } } } } std::string Elasticity_Kugelstadt2021::computeMD5(const unsigned int objIndex) { ElasticObject* obj = m_objects[objIndex]; auto& group = obj->m_particleIndices; auto numParticles = group.size(); std::vector tempN; tempN.resize(obj->m_particleIndices.size() * 2); for (size_t i = 0; i < numParticles; i++) { const unsigned int particleIndex = group[i]; const size_t numNeighbors = m_initialNeighbors[particleIndex].size(); tempN[2 * i] = static_cast(numNeighbors); tempN[2 * i + 1] = 0; for (auto j = 0; j < numNeighbors; j++) tempN[2 * i + 1] += m_initialNeighbors[particleIndex][j] - group[0]; } // compute MD5 checksum for all particle positions, this is used for the cache file MD5 context((unsigned char*)&tempN[0], static_cast(2 * numParticles * sizeof(unsigned int))); char* md5hex = context.hex_digest(); tempN.clear(); return std::string(md5hex); } void Elasticity_Kugelstadt2021::initValues() { Simulation *sim = Simulation::getCurrent(); sim->getNeighborhoodSearch()->find_neighbors(); FluidModel *model = m_model; const unsigned int numParticles = model->numActiveParticles(); const unsigned int fluidModelIndex = model->getPointSetIndex(); m_totalNeighbors = 0u; // Store the neighbors in the reference configurations and // compute the volume of each particle in rest state #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { m_current_to_initial_index[i] = i; m_initial_to_current_index[i] = i; // reset particle state if (m_model->getParticleState(i) == ParticleState::Fixed) m_model->setParticleState(i, ParticleState::Active); // only neighbors in same phase will influence elasticity unsigned int numNeighbors = sim->numberOfNeighbors(fluidModelIndex, fluidModelIndex, i); m_initialNeighbors[i].resize(numNeighbors); for (unsigned int j = 0; j < numNeighbors; j++) m_initialNeighbors[i][j] = sim->getNeighbor(fluidModelIndex, fluidModelIndex, i, j); // if maxNeighbors is set, then sort all neighbors wrt. to their distance to xi // and only take the maxNeighbors next ones. if (m_maxNeighbors > 0) { struct Comparator { Comparator(const Vector3r& xi, Vector3r* x) : m_xi(xi), m_x(x) {}; bool operator()(unsigned int a, unsigned int b) { return (m_x[a] - m_xi).squaredNorm() < (m_x[b] - m_xi).squaredNorm(); } Vector3r m_xi; Vector3r* m_x; }; // sort the neighbors according to their distance std::sort(m_initialNeighbors[i].begin(), m_initialNeighbors[i].end(), Comparator(model->getPosition0(i), &model->getPosition0(0))); // take only the next maxNeighbors if (m_initialNeighbors[i].size() > m_maxNeighbors) { numNeighbors = m_maxNeighbors; m_initialNeighbors[i].resize(m_maxNeighbors); } } m_totalNeighbors += numNeighbors; // compute rest volume Real density = model->getMass(i) * sim->W_zero(); const Vector3r &xi0 = model->getPosition0(i); for (size_t j = 0; j < m_initialNeighbors[i].size(); j++) { const unsigned int neighborIndex0 = m_initialNeighbors[i][j]; const Vector3r& xj0 = model->getPosition0(neighborIndex0); density += model->getMass(neighborIndex0) * sim->W(xi0 - xj0); } m_restVolumes[i] = model->getMass(i) / density; m_rotations[i].setIdentity(); m_stress[i] = 0.0; m_F[i].setIdentity(); m_vDiff[i].setZero(); m_RL[i].setIdentity(); } } // mark all particles in the bounding box as fixed determineFixedParticles(); // find separate objects START_TIMING("findObjects") findObjects(); STOP_TIMING_AVG; // if we find the same object, copy the neighborhood info size_t numObjects = m_objects.size(); auto& fluidInfos = sim->getFluidInfos(); for (auto objIndex = 1; objIndex < numObjects; objIndex++) { bool foundSameObj = false; int objIndex2; for (objIndex2 = 0; objIndex2 < objIndex; objIndex2++) { if (fluidInfos[objIndex].hasSameParticleSampling(fluidInfos[objIndex2])) { foundSameObj = true; break; } } if (foundSameObj) { ElasticObject* obj = m_objects[objIndex]; ElasticObject* obj0 = m_objects[objIndex2]; const std::vector& group = obj->m_particleIndices; const std::vector& group0 = obj0->m_particleIndices; int numParticles = (int)group.size(); int offset = group[0]; int offset0 = group0[0]; for (int i = 0; i < (int)numParticles; i++) { int particleIndex = group[i]; int particleIndex0 = group0[i]; const unsigned int i0 = m_current_to_initial_index[particleIndex]; const unsigned int i00 = m_current_to_initial_index[particleIndex0]; const size_t numNeighbors = m_initialNeighbors[i00].size(); m_initialNeighbors[i0].resize(numNeighbors); for (int j = 0; j < numNeighbors; j++) { m_initialNeighbors[i0][j] = m_initialNeighbors[i00][j] - offset0 + offset; } } } } // compute kernel gradient correction matrix START_TIMING("computeMatrixL") computeMatrixL(); STOP_TIMING_AVG; // init factorization START_TIMING("initSystem") initSystem(); STOP_TIMING_AVG; } void Elasticity_Kugelstadt2021::findObjects() { Simulation *sim = Simulation::getCurrent(); const unsigned int numParticles = m_model->numActiveParticles(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); FluidModel *model = m_model; m_objects.clear(); std::map obj2group; for (unsigned int i = 0; i < numParticles; i++) { const unsigned int objId = m_model->getObjectId(i); // search for object id in map if (obj2group.find(objId) == obj2group.end()) { // if id was not found, then we have a new object m_objects.push_back(new ElasticObject()); const unsigned int groupIndex = (unsigned int)(m_objects.size()-1); obj2group[objId] = groupIndex; const unsigned int i0 = m_current_to_initial_index[i]; m_objects[groupIndex]->m_particleIndices.push_back(i0); } else { // object already exists const unsigned int groupIndex = obj2group[objId]; const unsigned int i0 = m_current_to_initial_index[i]; m_objects[groupIndex]->m_particleIndices.push_back(i0); } } // For each object sort the particles so that all fixed particles are at the end of the list. // This is needed for the factorization to exclude fixed particles. for (size_t groupIndex = 0; groupIndex < m_objects.size(); groupIndex++) { struct Comparator { Comparator(Elasticity_Kugelstadt2021* _this) : m_this(_this) {}; bool operator()(unsigned int a, unsigned int b) { if ((m_this->m_model->getParticleState(a) != ParticleState::Active) && (m_this->m_model->getParticleState(b) == ParticleState::Active)) return false; else if ((m_this->m_model->getParticleState(a) == ParticleState::Active) && (m_this->m_model->getParticleState(b) != ParticleState::Active)) return true; else if ((m_this->m_model->getParticleState(a) != ParticleState::Active) && (m_this->m_model->getParticleState(b) != ParticleState::Active)) return a < b; else return a < b; } Elasticity_Kugelstadt2021* m_this; }; std::sort(m_objects[groupIndex]->m_particleIndices.begin(), m_objects[groupIndex]->m_particleIndices.end(), Comparator(this)); m_objects[groupIndex]->m_nFixed = 0; for (size_t i = 0; i < m_objects[groupIndex]->m_particleIndices.size(); i++) { if (m_model->getParticleState(m_objects[groupIndex]->m_particleIndices[i]) != ParticleState::Active) m_objects[groupIndex]->m_nFixed++; } LOG_INFO << "Object " << groupIndex << " - fixed particles: " << m_objects[groupIndex]->m_nFixed; } } void Elasticity_Kugelstadt2021::initSystem() { Simulation *sim = Simulation::getCurrent(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); const Real dt = TimeManager::getCurrent()->getTimeStepSize(); // Compute Lamé parameters m_mu = m_youngsModulus / (static_cast(2.0) * (static_cast(1.0) + m_poissonRatio)); m_lambda = m_youngsModulus * m_poissonRatio / ((static_cast(1.0) + m_poissonRatio) * (static_cast(1.0) - static_cast(2.0) * m_poissonRatio)); FluidModel *model = m_model; size_t numObjects = m_objects.size(); auto& fluidInfos = sim->getFluidInfos(); for (auto objIndex = 0; objIndex < numObjects; objIndex++) { ElasticObject* obj = m_objects[objIndex]; // compute MD5 check sum std::string md5 = computeMD5(objIndex); // check if object with same md5 already exists bool foundFactorization = false; for (size_t i = 0; i < objIndex; i++) { // reuse the factorization for all objects with the same particle sampling to reduce the memory consumption if (fluidInfos[objIndex].hasSameParticleSampling(fluidInfos[i])) { m_objects[objIndex]->m_factorization = m_objects[i]->m_factorization; foundFactorization = true; LOG_INFO << "Object " << objIndex << " is using the factorization of object " << i; break; } } // if no factorization was found, create a new one if (obj->m_factorization == nullptr) obj->m_factorization = std::make_shared(); // generate file name for cache file std::string baseName = Utilities::FileSystem::getFileName(fluidInfos[objIndex].samplesFile); std::string ext = Utilities::FileSystem::getFileExt(fluidInfos[objIndex].samplesFile); std::string cacheFileName = sim->getCachePath() + "/" + baseName + "_" + ext + "_" + std::to_string(fluidInfos[objIndex].mode) + "_" + md5 + "_" + Utilities::StringTools::real2String(dt) + "_" + Utilities::StringTools::real2String(m_mu) + "_" + Utilities::StringTools::real2String(m_alpha) + ".bin"; // Fluid block if (fluidInfos[objIndex].type == 0) { Vector3r diag = (fluidInfos[objIndex].box.max() - fluidInfos[objIndex].box.min()); cacheFileName = sim->getCachePath() + "/Block_" + Utilities::StringTools::real2String(diag.x()) + "_" + Utilities::StringTools::real2String(diag.y()) + "_" + Utilities::StringTools::real2String(diag.z()) + "_" + std::to_string(fluidInfos[objIndex].mode) + "_" + md5 + "_" + Utilities::StringTools::real2String(dt) + "_" + Utilities::StringTools::real2String(m_mu) + "_" + Utilities::StringTools::real2String(m_alpha) + ".bin"; } // check if cache file exists const bool foundCacheFile = Utilities::FileSystem::fileExists(cacheFileName); if (sim->getUseCache() && foundCacheFile) { // if factorization cannot be reused from another object and a cache file was found, load the cache file if (!foundFactorization) { LOG_INFO << "Read cached factorization: " << cacheFileName; BinaryFileReader binReader; binReader.openFile(cacheFileName); binReader.readSparseMatrix(obj->m_factorization->m_D); binReader.readSparseMatrix(obj->m_factorization->m_DT_K); binReader.read(obj->m_factorization->m_dt); binReader.read(obj->m_factorization->m_mu); binReader.readSparseMatrix(obj->m_factorization->m_matHTH); #ifdef USE_AVX delete obj->m_factorization->m_cholesky; obj->m_factorization->m_cholesky = new CholeskyAVXSolver(); obj->m_factorization->m_cholesky->load(binReader); #else binReader.readSparseMatrix(obj->m_factorization->m_matL); binReader.readSparseMatrix(obj->m_factorization->m_matLT); binReader.readMatrixX(obj->m_factorization->m_permInd); binReader.readMatrixX(obj->m_factorization->m_permInvInd); #endif binReader.closeFile(); } // init vectors int numParticles = (int)obj->m_particleIndices.size(); obj->m_RHS.resize(numParticles - obj->m_nFixed); #ifdef USE_AVX obj->m_rhs.resize(3 * numParticles - obj->m_nFixed); obj->m_sol.resize(3 * numParticles - obj->m_nFixed); obj->m_f_avx.resize(3 * numParticles); obj->m_sol_avx.resize(numParticles); int vecSize; if (numParticles % 8 == 0) vecSize = numParticles / 8; else vecSize = numParticles / 8 + 1; obj->m_quats_avx.resize(vecSize); for (int i = 0; i < vecSize; i++) obj->m_quats_avx[i] = Quaternion8f(); #else obj->m_f.resize(3 * numParticles); obj->m_sol.resize(numParticles); obj->m_quats.resize(numParticles, Quaternionr::Identity()); obj->m_RHS_perm.resize(numParticles - obj->m_nFixed); #endif } else // no cache found { ElasticObject* obj = m_objects[objIndex]; // compute new factorization if no factorization can be reused if (!foundFactorization) initFactorization(obj->m_factorization, obj->m_particleIndices, obj->m_nFixed, dt, m_mu); // init vectors int numParticles = (int)obj->m_particleIndices.size(); obj->m_RHS.resize(numParticles - obj->m_nFixed); #ifdef USE_AVX obj->m_rhs.resize(3 * numParticles - obj->m_nFixed); obj->m_sol.resize(3 * numParticles - obj->m_nFixed); obj->m_f_avx.resize(3 * numParticles); obj->m_sol_avx.resize(numParticles); int vecSize; if (numParticles % 8 == 0) vecSize = numParticles / 8; else vecSize = numParticles / 8 + 1; obj->m_quats_avx.resize(vecSize); for (int i = 0; i < vecSize; i++) obj->m_quats_avx[i] = Quaternion8f(); #else obj->m_f.resize(3 * numParticles); obj->m_sol.resize(numParticles); obj->m_quats.resize(numParticles, Quaternionr::Identity()); obj->m_RHS_perm.resize(numParticles - obj->m_nFixed); #endif // write cache file if (sim->getUseCache() && (Utilities::FileSystem::makeDir(sim->getCachePath()) == 0)) { BinaryFileWriter binWriter; binWriter.openFile(cacheFileName); binWriter.writeSparseMatrix(obj->m_factorization->m_D); binWriter.writeSparseMatrix(obj->m_factorization->m_DT_K); binWriter.write(obj->m_factorization->m_dt); binWriter.write(obj->m_factorization->m_mu); binWriter.writeSparseMatrix(obj->m_factorization->m_matHTH); #ifdef USE_AVX obj->m_factorization->m_cholesky->save(binWriter); #else binWriter.writeSparseMatrix(obj->m_factorization->m_matL); binWriter.writeSparseMatrix(obj->m_factorization->m_matLT); binWriter.writeMatrixX(obj->m_factorization->m_permInd); binWriter.writeMatrixX(obj->m_factorization->m_permInvInd); #endif binWriter.closeFile(); } } obj->m_md5 = md5; } } void Elasticity_Kugelstadt2021::initFactorization(std::shared_ptr factorization, std::vector& particleIndices, const unsigned int nFixed, const Real dt, const Real mu) { Simulation* sim = Simulation::getCurrent(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); factorization->m_dt = dt; factorization->m_mu = mu; // init mapping to find the particle indices in the current particle group (object) std::vector groupInv; groupInv.resize(m_model->numActiveParticles()); int numParticles = (int)particleIndices.size(); std::vector& group = particleIndices; for (int i = 0; i < numParticles; i++) groupInv[group[i]] = i; // determine total number of neighbors int totalNeighbors = 0; for (int i = 0; i < (int)numParticles; i++) { int particleIndex = group[i]; const int numNeighbors = (int) m_initialNeighbors[i].size(); totalNeighbors += numNeighbors; } // init triplets for matrices D, K, M std::vector> triplets_D; std::vector> triplets_K; std::vector> triplets_M; triplets_D.reserve(2 * 3 * totalNeighbors); triplets_K.reserve(3 * numParticles); triplets_M.reserve(numParticles); std::vector> triplets_H; triplets_H.reserve(totalNeighbors + numParticles); std::vector> triplets_K2; triplets_K2.reserve(totalNeighbors); const double dtd = dt; const double mud = mu; const double alphad = m_alpha; // init matrices D and K unsigned int row_index = 0; for (int i = 0; i < (int)numParticles; i++) { int particleIndex0 = group[i]; const unsigned int i0 = particleIndex0; unsigned int particleIndex = m_initial_to_current_index[i0]; const double restVolumes_id = m_restVolumes[i]; // init matrix K according to Eq. 13 in the paper // Set triplets for the matrix K. Directly multiply the factor 2*dt*dt into K const double Kreal = 2.0 * dtd * dtd * mud * restVolumes_id; // init mass matrix // all particles have the same mass => M = mass*I triplets_M.push_back(Eigen::Triplet(i, i, m_model->getMass(particleIndex))); for (int j = 0; j < 3; j++) triplets_K.push_back(Eigen::Triplet(3 * i + j, 3 * i + j, Kreal)); const Eigen::Vector3d xi0d = m_model->getPosition0(i0).cast(); const size_t numNeighbors = m_initialNeighbors[i0].size(); for (unsigned int j = 0; j < numNeighbors; j++) { const unsigned int neighborIndex0 = m_initialNeighbors[i0][j]; const unsigned int neighborIndex = m_initial_to_current_index[neighborIndex0]; const Eigen::Vector3d xj0d = m_model->getPosition0(neighborIndex0).cast(); const Eigen::Vector3d correctedKernelGradient = m_L[particleIndex].cast() * sim->gradW((xi0d - xj0d).cast()).cast(); const double restVolumes_jd = m_restVolumes[neighborIndex]; const Eigen::Vector3d y = restVolumes_jd * correctedKernelGradient; // init matrix D according to Eq. 3 and 4 in the supplemental document of the paper triplets_D.push_back(Eigen::Triplet(3 * i + 0, i, -y[0])); triplets_D.push_back(Eigen::Triplet(3 * i + 1, i, -y[1])); triplets_D.push_back(Eigen::Triplet(3 * i + 2, i, -y[2])); triplets_D.push_back(Eigen::Triplet(3 * i + 0, groupInv[neighborIndex0], y[0])); triplets_D.push_back(Eigen::Triplet(3 * i + 1, groupInv[neighborIndex0], y[1])); triplets_D.push_back(Eigen::Triplet(3 * i + 2, groupInv[neighborIndex0], y[2])); double sum = 0.0; const Eigen::Vector3d xi_xj_0 = xi0d - xj0d; const double xixj0_l2 = xi_xj_0.squaredNorm(); const double beta = 1.0; // init matrix \tilde K * dt^2 according to Eq. 2 in the paper const double Kreal2 = alphad * dtd * dtd * mud * static_cast(m_restVolumes[i]) * static_cast(m_restVolumes[neighborIndex]) * (sim->W(xi_xj_0.cast()) / xixj0_l2); triplets_K2.push_back(Eigen::Triplet(row_index, row_index, Kreal2)); // init matrix H according to Eq. 5, 6 and 7 in the supplemental document of the paper for (unsigned int k = 0; k < numNeighbors; k++) { const unsigned int kIndex0 = m_initialNeighbors[i0][k]; const unsigned int kIndex = m_initial_to_current_index[kIndex0]; const Eigen::Vector3d xk0d = m_model->getPosition0(kIndex0).cast(); const Eigen::Vector3d correctedKernelGradientd = m_L[particleIndex].cast() * sim->gradW((xi0d - xk0d).cast()).cast(); const double restVolumes_kd = m_restVolumes[kIndex]; const double H3j = beta * restVolumes_kd * correctedKernelGradientd.dot(xi0d - xj0d); triplets_H.push_back(Eigen::Triplet(row_index, groupInv[kIndex0], H3j)); sum -= H3j; if (k == j) { triplets_H.push_back(Eigen::Triplet(row_index, groupInv[kIndex0], beta)); } } triplets_H.push_back(Eigen::Triplet(row_index, i, sum - beta)); row_index++; } } Eigen::SparseMatrix D(3 * numParticles, numParticles); factorization->m_D.resize(3 * numParticles, numParticles); D.setFromTriplets(triplets_D.begin(), triplets_D.end()); factorization->m_D = D.cast(); //set matrices Eigen::SparseMatrix K(3 * numParticles, 3 * numParticles); // actually 2 * dt* dt * K K.setFromTriplets(triplets_K.begin(), triplets_K.end()); Eigen::SparseMatrix M(numParticles, numParticles); M.setFromTriplets(triplets_M.begin(), triplets_M.end()); Eigen::SparseMatrix DT_K(numParticles, 3 * numParticles); factorization->m_DT_K.resize(numParticles, 3 * numParticles); DT_K = D.transpose() * K; factorization->m_DT_K = DT_K.cast(); Eigen::SparseMatrix K2(totalNeighbors, totalNeighbors); K2.setFromTriplets(triplets_K2.begin(), triplets_K2.end()); Eigen::SparseMatrix H(totalNeighbors, numParticles); H.setFromTriplets(triplets_H.begin(), triplets_H.end()); Eigen::SparseMatrix HTH(numParticles, numParticles); factorization->m_matHTH.resize(numParticles, numParticles); HTH = H.transpose() * K2 * H; factorization->m_matHTH = HTH.cast(); LOG_INFO << "Non zero elements (H^T * K * H): " << factorization->m_matHTH.nonZeros(); Eigen::SparseMatrix DT_K_D = DT_K * D; Eigen::SparseMatrix M_plus_DT_K_D; // init linear system matrix according to Eq. 29 in the paper if (m_alpha != 0.0) M_plus_DT_K_D = (M + DT_K_D + HTH).block(0, 0, numParticles - nFixed, numParticles - nFixed); else // no zero energy mode control M_plus_DT_K_D = (M + DT_K_D).block(0, 0, numParticles - nFixed, numParticles - nFixed); M_plus_DT_K_D.makeCompressed(); #ifdef USE_AVX // compute factorization of the matrix delete factorization->m_cholesky; factorization->m_cholesky = new CholeskyAVXSolver(M_plus_DT_K_D); #else SolverLLT* solverLLT = new SolverLLT(); solverLLT->compute(M_plus_DT_K_D); if (solverLLT->info() != Eigen::Success) { LOG_ERR << "Cholesky decomposition failed."; LOG_ERR << solverLLT->info(); return; } factorization->m_permInd = solverLLT->permutationP().indices(); factorization->m_permInvInd = solverLLT->permutationPinv().indices(); factorization->m_matL = Eigen::SparseMatrix(solverLLT->matrixL().cast()); factorization->m_matLT = Eigen::SparseMatrix(solverLLT->matrixU().cast()); delete solverLLT; #endif LOG_INFO << "Non zero elements (A): " << M_plus_DT_K_D.nonZeros(); } void Elasticity_Kugelstadt2021::step() { // apply accelerations const unsigned int numParticles = m_model->numActiveParticles(); const Real dt = TimeManager::getCurrent()->getTimeStepSize(); #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (m_model->getParticleState(i) == ParticleState::Active) { Vector3r& vel = m_model->getVelocity(i); vel += dt * m_model->getAcceleration(i); m_model->getAcceleration(i).setZero(); } } } START_TIMING("Elasticity") stepElasticitySolver(); // elasticity solver using the factorization stepVolumeSolver(); // volume solver using a cg method STOP_TIMING_AVG } void Elasticity_Kugelstadt2021::reset() { initValues(); } void Elasticity_Kugelstadt2021::performNeighborhoodSearchSort() { const unsigned int numPart = m_model->numActiveParticles(); if (numPart == 0) return; Simulation *sim = Simulation::getCurrent(); auto const& d = sim->getNeighborhoodSearch()->point_set(m_model->getPointSetIndex()); d.sort_field(&m_rotations[0]); d.sort_field(&m_current_to_initial_index[0]); d.sort_field(&m_L[0]); d.sort_field(&m_restVolumes[0]); d.sort_field(&m_vDiff[0]); for (unsigned int i = 0; i < numPart; i++) m_initial_to_current_index[m_current_to_initial_index[i]] = i; #ifdef USE_AVX // update quaterions which are needed for warmstart rotationMatricesToAVXQuaternions(); #endif } void Elasticity_Kugelstadt2021::computeMatrixL() { Simulation *sim = Simulation::getCurrent(); const unsigned int numParticles = m_model->numActiveParticles(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = m_current_to_initial_index[i]; const Vector3r &xi0 = m_model->getPosition0(i0); Matrix3r L; L.setZero(); const size_t numNeighbors = m_initialNeighbors[i0].size(); // Fluid for (unsigned int j = 0; j < numNeighbors; j++) { // get initial neighbor index considering the current particle order const unsigned int neighborIndex0 = m_initialNeighbors[i0][j]; const unsigned int neighborIndex = m_initial_to_current_index[neighborIndex0]; const Vector3r &xj0 = m_model->getPosition0(neighborIndex0); const Vector3r xj_xi_0 = xj0 - xi0; const Vector3r gradW = sim->gradW(xj_xi_0); // minus because gradW(xij0) == -gradW(xji0) L -= m_restVolumes[neighborIndex] * gradW * xj_xi_0.transpose(); } // add 1 to z-component. otherwise we get a singular matrix in 2D if (sim->is2DSimulation()) L(2, 2) = 1.0; bool invertible = false; L.computeInverseWithCheck(m_L[i], invertible, static_cast(1e-9)); if (!invertible) { LOG_INFO << "Matrix not invertible."; MathFunctions::pseudoInverse(L, m_L[i]); //m_L[i] = Matrix3r::Identity(); } } } } void Elasticity_Kugelstadt2021::saveState(BinaryFileWriter &binWriter) { binWriter.writeBuffer((char*)m_current_to_initial_index.data(), m_current_to_initial_index.size() * sizeof(unsigned int)); binWriter.writeBuffer((char*)m_initial_to_current_index.data(), m_initial_to_current_index.size() * sizeof(unsigned int)); binWriter.writeBuffer((char*)m_L.data(), m_L.size() * sizeof(Matrix3r)); binWriter.writeBuffer((char*)m_rotations.data(), m_rotations.size() * sizeof(Matrix3r)); binWriter.writeBuffer((char*)m_vDiff.data(), m_vDiff.size() * sizeof(Vector3r)); } void Elasticity_Kugelstadt2021::loadState(BinaryFileReader &binReader) { binReader.readBuffer((char*)m_current_to_initial_index.data(), m_current_to_initial_index.size() * sizeof(unsigned int)); binReader.readBuffer((char*)m_initial_to_current_index.data(), m_initial_to_current_index.size() * sizeof(unsigned int)); binReader.readBuffer((char*)m_L.data(), m_L.size() * sizeof(Matrix3r)); binReader.readBuffer((char*)m_rotations.data(), m_rotations.size() * sizeof(Matrix3r)); binReader.readBuffer((char*)m_vDiff.data(), m_vDiff.size() * sizeof(Vector3r)); #ifdef USE_AVX // update quaternions which are needed for warmstart rotationMatricesToAVXQuaternions(); #endif } void Elasticity_Kugelstadt2021::stepVolumeSolver() { const unsigned int numParticles = m_model->numActiveParticles(); if (numParticles == 0) return; const Real dt = TimeManager::getCurrent()->getTimeStepSize(); // extract current rotations START_TIMING("computeRotations"); computeRotations(); STOP_TIMING_AVG; // precompute some values to improve performance START_TIMING("precomputeValues"); precomputeValues(); STOP_TIMING_AVG; START_TIMING("Volume solver") // Init linear system solver MatrixReplacement A(3 * m_model->numActiveParticles(), matrixVecProd, (void*)this); m_solver.setTolerance(m_maxErrorV); m_solver.setMaxIterations(m_maxIterV); m_solver.compute(A); VectorXr b(3 * numParticles); VectorXr x(3 * numParticles); VectorXr g(3 * numParticles); // Compute right hand side of the system in Eq. 30 computeRHS(b); // warmstart #pragma omp parallel for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (m_model->getParticleState(i) == ParticleState::Active) g.segment<3>(3 * i) = m_model->getVelocity(i) + m_vDiff[i]; else { b.segment<3>(3 * i) = m_model->getVelocity(i); g.segment<3>(3 * i) = m_model->getVelocity(i); } } // Solve linear system START_TIMING("Volume - CG solve"); x = m_solver.solveWithGuess(b, g); m_iterationsV = (int)m_solver.iterations(); STOP_TIMING_AVG; INCREASE_COUNTER("Elasticity - CG iterations", static_cast(m_iterationsV)); #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (m_model->getParticleState(i) == ParticleState::Active) { Vector3r& vi = m_model->getVelocity(i); const Vector3r new_vi = x.segment<3>(3 * i); m_vDiff[i] = new_vi - vi; vi = new_vi; } } } STOP_TIMING_AVG; } #ifdef USE_AVX void Elasticity_Kugelstadt2021::rotationMatricesToAVXQuaternions() { size_t numObjects = m_objects.size(); for (auto objIndex = 0; objIndex < numObjects; objIndex++) { ElasticObject* obj = m_objects[objIndex]; const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); int vecSize; if (numParticles % 8 == 0) vecSize = numParticles / 8; else vecSize = numParticles / 8 + 1; auto& quats = obj->m_quats_avx; #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < vecSize; i++) { const int count = std::min(numParticles - i * 8, 8); // store the rotation matrices of 8 particles in avx vectors int idx[8]; for (int j = 0; j < count; j++) idx[j] = m_initial_to_current_index[group[8 * i + j]]; for (int j = count; j < 8; j++) idx[j] = 0; Quaternionr q[8]; for (auto j = 0; j < count; j++) q[j] = Quaternionr(m_rotations[idx[j]]).normalized(); for (auto j = count; j < 8; j++) q[j] = Quaternionr(1,0,0,0); Quaternion8f& q_avx = quats[i]; q_avx.set(q); } } } } void Elasticity_Kugelstadt2021::computeRotations() { const Real dt = TimeManager::getCurrent()->getTimeStepSize(); size_t numObjects = m_objects.size(); for (auto objIndex = 0; objIndex < numObjects; objIndex++) { ElasticObject* obj = m_objects[objIndex]; const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); int vecSize; if (numParticles % 8 == 0) vecSize = numParticles / 8; else vecSize = numParticles / 8 + 1; auto& D = obj->m_factorization->m_D; auto& f_avx = obj->m_f_avx; auto& sol_avx = obj->m_sol_avx; auto& quats = obj->m_quats_avx; // advect particles #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Vector3r& xi0 = m_model->getPosition0(i0); const size_t numNeighbors = m_initialNeighbors[i0].size(); Vector3r xNew = m_model->getPosition(particleIndex); xNew += dt * m_model->getVelocity(particleIndex); // copy the 3 coordinates to sol_avx sol_avx[i] = Scalarf8(xNew[0], xNew[1], xNew[2], 0, 0, 0, 0, 0); f_avx[3 * i].setZero(); f_avx[3 * i + 1].setZero(); f_avx[3 * i + 2].setZero(); } // compute sparse matrix-vector product in parallel #pragma omp for schedule(static) for (int k = 0; k < D.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(D, k); it; ++it) { f_avx[it.row()] += Scalarf8(it.value()) * sol_avx[it.col()]; } } #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; // copy data from f_avx to m_F float x0[8], x1[8], x2[8]; f_avx[3 * i].store(x0); f_avx[3 * i + 1].store(x1); f_avx[3 * i + 2].store(x2); m_F[particleIndex] << x0[0], x0[1], x0[2], x1[0], x1[1], x1[2], x2[0], x2[1], x2[2]; } #pragma omp for schedule(static) for (int i = 0; i < vecSize; i++) { const int count = std::min(numParticles - i * 8, 8); // store the deformation gradient of 8 particles in avx vectors int idx[8]; for (int j = 0; j < count; j++) idx[j] = m_initial_to_current_index[group[8 * i + j]]; for (int j = count; j < 8; j++) idx[j] = 0; Vector3f8 F1, F2, F3; //columns of the deformation gradient F1 = Vector3f8(m_F[idx[0]].col(0), m_F[idx[1]].col(0), m_F[idx[2]].col(0), m_F[idx[3]].col(0), m_F[idx[4]].col(0), m_F[idx[5]].col(0), m_F[idx[6]].col(0), m_F[idx[7]].col(0)); F2 = Vector3f8(m_F[idx[0]].col(1), m_F[idx[1]].col(1), m_F[idx[2]].col(1), m_F[idx[3]].col(1), m_F[idx[4]].col(1), m_F[idx[5]].col(1), m_F[idx[6]].col(1), m_F[idx[7]].col(1)); F3 = Vector3f8(m_F[idx[0]].col(2), m_F[idx[1]].col(2), m_F[idx[2]].col(2), m_F[idx[3]].col(2), m_F[idx[4]].col(2), m_F[idx[5]].col(2), m_F[idx[6]].col(2), m_F[idx[7]].col(2)); // perform analytic polar decomposition Quaternion8f& q = quats[i]; MathFunctions_AVX::APD_Newton_AVX(F1, F2, F3, q); //transform quaternion to rotation matrix Vector3f8 R1, R2, R3; //columns of the rotation matrix quats[i].toRotationMatrix(R1, R2, R3); std::array r0, r1, r2; R1.store(r0.data()); R2.store(r1.data()); R3.store(r2.data()); for (auto j = 0; j < count; j++) { m_rotations[idx[j]].col(0) = r0[j]; m_rotations[idx[j]].col(1) = r1[j]; m_rotations[idx[j]].col(2) = r2[j]; } } } } } void Elasticity_Kugelstadt2021::stepElasticitySolver() { START_TIMING("Elasticity_Kugelstadt2021") const unsigned int numActiveParticles = m_model->numActiveParticles(); if (numActiveParticles == 0) return; const Real dt = TimeManager::getCurrent()->getTimeStepSize(); Simulation* sim = Simulation::getCurrent(); size_t numObjects = m_objects.size(); // solve the systems for each object separately for (auto objIndex = 0; objIndex < numObjects; objIndex++) { ElasticObject* obj = m_objects[objIndex]; const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); auto& D = obj->m_factorization->m_D; auto& DT_K = obj->m_factorization->m_DT_K; auto& HT_K_H = obj->m_factorization->m_matHTH; auto& RHS = obj->m_RHS; auto& f_avx = obj->m_f_avx; auto& sol_avx = obj->m_sol_avx; auto& quats = obj->m_quats_avx; START_TIMING("advect x & Dx") #pragma omp parallel default(shared) { // advect particles to get \tilde x in Eq. 29: // store the 3 components of the advected positions in f_avx // store the 3 components of dt*velocity in v_avx #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const size_t numNeighbors = m_initialNeighbors[i0].size(); const Real fdt = obj->m_factorization->m_dt; const Vector3r x = m_model->getPosition(particleIndex); const Vector3r dv = fdt * m_model->getVelocity(particleIndex); const Vector3r xNew = x + dv; // copy the 3 coordinates to sol_avx sol_avx[i] = Scalarf8(xNew[0], xNew[1], xNew[2], 0, 0, 0, 0, 0); f_avx[3 * i].setZero(); f_avx[3 * i + 1].setZero(); f_avx[3 * i + 2].setZero(); } // compute deformation gradient F by sparse matrix-vector product in parallel: // // f_avx = D * x_advected (Eq. 12 and Eq. 29) #pragma omp for schedule(static) for (int k = 0; k < D.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(D, k); it; ++it) { f_avx[it.row()] += Scalarf8(it.value()) * sol_avx[it.col()]; } } } STOP_TIMING_AVG; int vecSize; if (numParticles % 8 == 0) vecSize = numParticles / 8; else vecSize = numParticles / 8 + 1; START_TIMING("extract rot"); #pragma omp parallel default(shared) { // extract deformation gradient from avx values f_avx #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; // copy data from f_avx to m_F float x0[8], x1[8], x2[8]; f_avx[3*i].store(x0); f_avx[3*i+1].store(x1); f_avx[3*i+2].store(x2); m_F[particleIndex] << x0[0], x0[1], x0[2], x1[0], x1[1], x1[2], x2[0], x2[1], x2[2]; } // extract rotation from F by analytic polar decomposition (Kugelstadt et al. 2018) #pragma omp for schedule(static) for (int i = 0; i < vecSize; i++) { const int count = std::min(numParticles - i*8, 8); // store the deformation gradient of 8 particles in avx vectors int idx[8]; for (int j=0; j < count; j++) idx[j] = m_initial_to_current_index[group[8*i + j]]; for (int j = count; j < 8; j++) idx[j] = 0; Vector3f8 F1, F2, F3; //columns of the deformation gradient F1 = Vector3f8(m_F[idx[0]].col(0), m_F[idx[1]].col(0), m_F[idx[2]].col(0), m_F[idx[3]].col(0), m_F[idx[4]].col(0), m_F[idx[5]].col(0), m_F[idx[6]].col(0), m_F[idx[7]].col(0)); F2 = Vector3f8(m_F[idx[0]].col(1), m_F[idx[1]].col(1), m_F[idx[2]].col(1), m_F[idx[3]].col(1), m_F[idx[4]].col(1), m_F[idx[5]].col(1), m_F[idx[6]].col(1), m_F[idx[7]].col(1)); F3 = Vector3f8(m_F[idx[0]].col(2), m_F[idx[1]].col(2), m_F[idx[2]].col(2), m_F[idx[3]].col(2), m_F[idx[4]].col(2), m_F[idx[5]].col(2), m_F[idx[6]].col(2), m_F[idx[7]].col(2)); // perform polar decomposition Quaternion8f& q = quats[i]; MathFunctions_AVX::APD_Newton_AVX(F1, F2, F3, q); //transform quaternion to rotation matrix Vector3f8 R1, R2, R3; //columns of the rotation matrix quats[i].toRotationMatrix(R1, R2, R3); // R := R-F R1 -= F1; R2 -= F2; R3 -= F3; // store result in f_avx // f_avx has size 3*n and 3 rows contain F-R std::array v0, v1, v2; R1.store(v0.data()); R2.store(v1.data()); R3.store(v2.data()); for (auto j = 0; j < count; j++) { f_avx[24*i + 3*j] = Scalarf8(v0[j][0], v1[j][0], v2[j][0], 0, 0, 0, 0, 0); f_avx[24*i + 3*j + 1] = Scalarf8(v0[j][1], v1[j][1], v2[j][1], 0, 0, 0, 0, 0); f_avx[24*i + 3*j + 2] = Scalarf8(v0[j][2], v1[j][2], v2[j][2], 0, 0, 0, 0, 0); if (8*i+j < RHS.size()) RHS[8*i+j] = Scalarf8(0.0f); } } } STOP_TIMING_AVG // Compute right hand side START_TIMING("rhs") #pragma omp parallel default(shared) { // Compute D^T K * (R-F) (Eq. 29) // Note: K already contains the factor: 2 dt^2 #pragma omp for schedule(static) for (int k = 0; k < DT_K.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(DT_K, k); it; ++it) { if (it.row() < (int) RHS.size()) RHS[it.row()] += Scalarf8(it.value()) * f_avx[it.col()]; } } } // If zero energy model control is turned on, // add the following term to the right hand side (Eq. 29): // H^T * K2 * H * x_advected // Note: K2 already contains the factor: dt^2 if (m_alpha != 0.0) { #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int k = 0; k < HT_K_H.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(HT_K_H, k); it; ++it) { if (it.col() < (int)RHS.size()) RHS[it.col()] -= Scalarf8(static_cast(it.value())) * sol_avx[it.row()]; } } } } // Copy the right hand side in a vector for the cholesky solver #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)RHS.size(); i++) { float x[8]; RHS[i].store(x); obj->m_rhs[3 * i] = x[0]; obj->m_rhs[3 * i + 1] = x[1]; obj->m_rhs[3 * i + 2] = x[2]; } } STOP_TIMING_AVG; } // Solve linear system START_TIMING("solve SLE") #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < 3*numObjects; i++) { int objIndex = i / 3; int index = i % 3; ElasticObject* obj = m_objects[objIndex]; obj->m_factorization->m_cholesky->solve(&obj->m_sol[index], &obj->m_rhs[index], /* stride = */ 3); } } for (auto objIndex = 0; objIndex < numObjects; objIndex++) { ElasticObject* obj = m_objects[objIndex]; const std::vector& group = obj->m_particleIndices; #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int) obj->m_sol.size()/3; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; if (m_model->getParticleState(particleIndex) == ParticleState::Active) { Vector3r& vi = m_model->getVelocity(particleIndex); const Vector3r &dx = obj->m_sol.segment<3>(3*i); const Real fdt = obj->m_factorization->m_dt; vi += (1.0 / fdt) * dx; } } } } STOP_TIMING_AVG STOP_TIMING_AVG } void Elasticity_Kugelstadt2021::matrixVecProd(const Real* vec, Real *result, void *userData) { Simulation *sim = Simulation::getCurrent(); Elasticity_Kugelstadt2021* elasticity = static_cast(userData); FluidModel *model = elasticity->getModel(); const unsigned int numParticles = model->numActiveParticles(); const Real dt = TimeManager::getCurrent()->getTimeStepSize(); const auto ¤t_to_initial_index = elasticity->m_current_to_initial_index; const auto &initial_to_current_index = elasticity->m_initial_to_current_index; const auto &initialNeighbors = elasticity->m_initialNeighbors; const auto &restVolumes = elasticity->m_restVolumes; auto &stress = elasticity->m_stress; const auto& precomp_RL_gradW8 = elasticity->m_precomp_RL_gradW8; const auto& precomp_RLj_gradW8 = elasticity->m_precomp_RLj_gradW8; const auto& precomputed_indices8 = elasticity->m_precomputed_indices8; #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = current_to_initial_index[i]; const Vector3r& pi = Eigen::Map(&vec[3 * i], 3); const unsigned int numNeighbors = (unsigned int)initialNeighbors[i0].size(); const Vector3f8 pi_avx(pi); // compute corotated deformation gradient Scalarf8 trace_avx; trace_avx.setZero(); for (unsigned int j = 0; j < numNeighbors; j += 8) { const unsigned int count = std::min(numNeighbors - j, 8u); unsigned int nIndices[8]; for (auto k = 0u; k < count; k++) nIndices[k] = initial_to_current_index[initialNeighbors[i0][j + k]]; const Vector3f8 pj_avx = convertVec_zero(nIndices, &vec[0], count); const Vector3f8 pj_pi = pj_avx - pi_avx; const Vector3f8& correctedRotatedKernel = precomp_RL_gradW8[precomputed_indices8[i] + j / 8]; // We need the trace of the strain tensor. Therefore, we are only interested in the // diagonal elements which are F_ii-1. However, instead of computing the trace // of a dyadic product, we can determine the dot product of the vectors to get the trace // of F. Then the trace of the strain is the result minus 3. trace_avx += pj_pi.dot(correctedRotatedKernel); } Real trace = trace_avx.reduce(); trace *= dt; // First Piola Kirchhoff stress = lambda trace(epsilon) I stress[i] = elasticity->m_lambda * trace; } } #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (model->getParticleState(i) == ParticleState::Active) { const unsigned int i0 = current_to_initial_index[i]; const unsigned int numNeighbors = (unsigned int) initialNeighbors[i0].size(); const Scalarf8 stress_i_avx(stress[i]); const Scalarf8 restVolume_i_avx(restVolumes[i]); // Compute elastic force f^l_i in Eq. 30 Vector3f8 force_avx; force_avx.setZero(); for (unsigned int j = 0; j < numNeighbors; j+=8) { const unsigned int count = std::min(numNeighbors - j, 8u); unsigned int nIndices[8]; for (auto k = 0u; k < count; k++) nIndices[k] = initial_to_current_index[initialNeighbors[i0][j + k]]; const Scalarf8 stress_j_avx = convert_zero(nIndices, &stress[0], count); const Scalarf8 restVolume_j_avx = convert_zero(nIndices, &restVolumes[0], count); const Vector3f8& V_RL_gradWi = precomp_RL_gradW8[precomputed_indices8[i] + j / 8]; const Vector3f8& V_RL_gradWj = precomp_RLj_gradW8[precomputed_indices8[i] + j / 8]; force_avx += (V_RL_gradWi * (restVolume_i_avx * stress_i_avx) + V_RL_gradWj * (restVolume_j_avx * stress_j_avx)); } const Real factor = dt / model->getMass(i); result[3 * i] = vec[3 * i] - factor * force_avx.x().reduce(); result[3 * i + 1] = vec[3 * i + 1] - factor * force_avx.y().reduce(); result[3 * i + 2] = vec[3 * i + 2] - factor * force_avx.z().reduce(); } else { result[3 * i] = vec[3 * i]; result[3 * i + 1] = vec[3 * i + 1]; result[3 * i + 2] = vec[3 * i + 2]; } } } } void Elasticity_Kugelstadt2021::computeF(const unsigned int i, const Real* x, Elasticity_Kugelstadt2021 *e) { const unsigned int i0 = e->m_current_to_initial_index[i]; const Vector3r& vi = Eigen::Map(&x[3 * i], 3); const unsigned int numNeighbors = (unsigned int)e->m_initialNeighbors[i0].size(); const Vector3f8 vi_avx(vi); // compute corotated deformation gradient (Eq. 12) Matrix3f8 F_avx; F_avx.setZero(); // Fluid for (unsigned int j = 0; j < numNeighbors; j += 8) { const unsigned int count = std::min(numNeighbors - j, 8u); unsigned int nIndices[8]; for (auto k = 0u; k < count; k++) nIndices[k] = e->m_initial_to_current_index[e->m_initialNeighbors[i0][j + k]]; const Vector3f8 vj_avx = convertVec_zero(nIndices, &x[0], count); const Vector3f8 vj_vi = vj_avx - vi_avx; const Vector3f8& correctedKernel = e->m_precomp_L_gradW8[e->m_precomputed_indices8[i] + j / 8]; Matrix3f8 dyad; dyadicProduct(vj_vi, correctedKernel, dyad); F_avx += dyad; } e->m_F[i] = F_avx.reduce(); if (Simulation::getCurrent()->is2DSimulation()) e->m_F[i](2, 2) = 1.0; } void Elasticity_Kugelstadt2021::computeRHS(VectorXr & rhs) { Simulation *sim = Simulation::getCurrent(); const unsigned int numParticles = m_model->numActiveParticles(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); FluidModel *model = m_model; const Real dt = TimeManager::getCurrent()->getTimeStepSize(); #pragma omp parallel default(shared) { // update the deformation gradient #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (model->getParticleState(i) == ParticleState::Active) computeF(i, &m_model->getPosition(0)[0], this); } #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (m_model->getParticleState(i) == ParticleState::Active) { // trace of R^T F - I //const Matrix3r RTF = (m_rotations[i].transpose() * m_F[i]); //const Real trace = RTF(0, 0) + RTF(1, 1) + RTF(2, 2) - 3.0; // short form of: trace(R^T F - I) const Real trace = m_rotations[i].row(0).dot(m_F[i].col(0)) + m_rotations[i].row(1).dot(m_F[i].col(1)) + m_rotations[i].row(2).dot(m_F[i].col(2)) - static_cast(3.0); m_stress[i] = m_lambda * trace; } else m_stress[i] = 0.0; } } #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (model->getParticleState(i) == ParticleState::Active) { const unsigned int i0 = m_current_to_initial_index[i]; const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size(); const Scalarf8 stress_i_avx(m_stress[i]); const Scalarf8 restVolume_i_avx(m_restVolumes[i]); // Compute elastic force (Eq. 30) Vector3f8 force_avx; force_avx.setZero(); for (unsigned int j = 0; j < numNeighbors; j += 8) { const unsigned int count = std::min(numNeighbors - j, 8u); unsigned int nIndices[8]; for (auto k = 0u; k < count; k++) nIndices[k] = m_initial_to_current_index[m_initialNeighbors[i0][j + k]]; const Scalarf8 stress_j_avx = convert_zero(nIndices, &m_stress[0], count); const Scalarf8 restVolume_j_avx = convert_zero(nIndices, &m_restVolumes[0], count); const Vector3f8& correctedRotatedKernel_i = m_precomp_RL_gradW8[m_precomputed_indices8[i] + j / 8]; const Vector3f8& correctedRotatedKernel_j = m_precomp_RLj_gradW8[m_precomputed_indices8[i] + j / 8]; const Vector3f8 PWi = correctedRotatedKernel_i * stress_i_avx; const Vector3f8 PWj = correctedRotatedKernel_j * stress_j_avx; force_avx += (PWi * restVolume_i_avx + PWj * restVolume_j_avx); } Vector3r force; force[0] = force_avx.x().reduce(); force[1] = force_avx.y().reduce(); force[2] = force_avx.z().reduce(); rhs.segment<3>(3 * i) = model->getVelocity(i) + dt * (1.0 / model->getMass(i) * force); } else { rhs.segment<3>(3 * i) = model->getVelocity(i); } } } } #else void Elasticity_Kugelstadt2021::computeRotations() { const Real dt = TimeManager::getCurrent()->getTimeStepSize(); size_t numObjects = m_objects.size(); for (auto objIndex = 0; objIndex < numObjects; objIndex++) { ElasticObject* obj = m_objects[objIndex]; const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); auto& D = obj->m_factorization->m_D; auto& f = obj->m_f; auto& sol = obj->m_sol; auto& quats = obj->m_quats; // advect particles #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Vector3r& xi0 = m_model->getPosition0(i0); const size_t numNeighbors = m_initialNeighbors[i0].size(); Vector3r xNew = m_model->getPosition(particleIndex); xNew += dt * m_model->getVelocity(particleIndex); sol[i] = xNew; f[3 * i].setZero(); f[3 * i + 1].setZero(); f[3 * i + 2].setZero(); } // compute sparse matrix-vector product in parallel #pragma omp for schedule(static) for (int k = 0; k < D.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(D, k); it; ++it) { f[it.row()] += it.value() * sol[it.col()]; } } // extract deformation gradient from values f #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Vector3r &x0 = f[3 * i]; const Vector3r &x1 = f[3 * i + 1]; const Vector3r &x2 = f[3 * i + 2]; m_F[particleIndex] << x0[0], x0[1], x0[2], x1[0], x1[1], x1[2], x2[0], x2[1], x2[2]; Quaternionr& q = quats[i]; MathFunctions::APD_Newton(m_F[particleIndex], q); m_rotations[particleIndex] = q.matrix().transpose(); } } } } void Elasticity_Kugelstadt2021::stepElasticitySolver() { START_TIMING("Elasticity_Kugelstadt2021") const unsigned int numActiveParticles = m_model->numActiveParticles(); if (numActiveParticles == 0) return; const Real dt = TimeManager::getCurrent()->getTimeStepSize(); Simulation* sim = Simulation::getCurrent(); size_t numObjects = m_objects.size(); // solve the systems for each object separately for (auto objIndex = 0; objIndex < numObjects; objIndex++) { ElasticObject* obj = m_objects[objIndex]; const std::vector& group = obj->m_particleIndices; int numParticles = (int)group.size(); auto& D = obj->m_factorization->m_D; auto& DT_K = obj->m_factorization->m_DT_K; auto& HT_K_H = obj->m_factorization->m_matHTH; auto& RHS = obj->m_RHS; auto& RHS_perm = obj->m_RHS_perm; auto& permInd = obj->m_factorization->m_permInd; auto& f = obj->m_f; auto& quats = obj->m_quats; auto& sol = obj->m_sol; START_TIMING("advect x & Dx") #pragma omp parallel default(shared) { // advect particles to get \tilde x in Eq. 29: // store the 3 components of the advected positions in f_avx // store the 3 components of dt*velocity in v_avx #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const size_t numNeighbors = m_initialNeighbors[i0].size(); const Real fdt = obj->m_factorization->m_dt; const Vector3r x = m_model->getPosition(particleIndex); const Vector3r dv = fdt * m_model->getVelocity(particleIndex); const Vector3r xNew = x + dv; sol[i] = xNew; f[3 * i].setZero(); f[3 * i + 1].setZero(); f[3 * i + 2].setZero(); } // compute deformation gradient F by sparse matrix-vector product in parallel: // // f = D * x_advected (Eq. 12 and Eq. 29) #pragma omp for schedule(static) for (int k = 0; k < D.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(D, k); it; ++it) { f[it.row()] += it.value() * sol[it.col()]; } } // extract deformation gradient from values f #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; const Vector3r &x0 = f[3 * i]; const Vector3r &x1 = f[3 * i + 1]; const Vector3r &x2 = f[3 * i + 2]; m_F[particleIndex] << x0[0], x0[1], x0[2], x1[0], x1[1], x1[2], x2[0], x2[1], x2[2]; Quaternionr& q = quats[i]; MathFunctions::APD_Newton(m_F[particleIndex], q); m_rotations[particleIndex] = q.matrix(); f[3 * i] = m_rotations[particleIndex].row(0) - m_F[particleIndex].row(0); f[3 * i + 1] = m_rotations[particleIndex].row(1) - m_F[particleIndex].row(1); f[3 * i + 2] = m_rotations[particleIndex].row(2) - m_F[particleIndex].row(2); if (i < RHS.size()) RHS[i].setZero(); } } STOP_TIMING_AVG; // Compute right hand side START_TIMING("rhs") #pragma omp parallel default(shared) { // Compute D^T K * (R-F) (Eq. 29) // Note: K already contains the factor: 2 dt^2 #pragma omp for schedule(static) for (int k = 0; k < DT_K.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(DT_K, k); it; ++it) { if (it.row() < (int) RHS.size()) RHS[it.row()] += it.value() * f[it.col()]; } } } // If zero energy model control is turned on, // add the following term to the right hand side (Eq. 29): // H^T * K2 * H * x_advected // Note: K2 already contains the factor: dt^2 if (m_alpha != 0.0) { #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int k = 0; k < HT_K_H.outerSize(); ++k) { for (Eigen::SparseMatrix::InnerIterator it(HT_K_H, k); it; ++it) { if (it.col() < (int)RHS.size()) RHS[it.col()] -= it.value() * sol[it.row()]; } } } } #pragma omp parallel default(shared) { //permutation of the RHS because of Eigen's fill-in reduction #pragma omp for schedule(static) for (int i = 0; i < (int)RHS.size(); i++) RHS_perm[permInd[i]] = RHS[i]; } STOP_TIMING_AVG; } // Solve linear system START_TIMING("solve SLE") #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int objIndex = 0; objIndex < (int) numObjects; objIndex++) { ElasticObject* obj = m_objects[objIndex]; auto& RHS_perm = obj->m_RHS_perm; auto& matL = obj->m_factorization->m_matL; auto& matLT = obj->m_factorization->m_matLT; for (int k = 0; k < matL.outerSize(); ++k) for (Eigen::SparseMatrix::InnerIterator it(matL, k); it; ++it) if (it.row() == it.col()) RHS_perm[it.row()] /= it.value(); else RHS_perm[it.row()] -= it.value() * RHS_perm[it.col()]; //backward substitution for (int k = (int) matLT.outerSize() - 1; k >= 0; --k) for (Eigen::SparseMatrix::ReverseInnerIterator it(matLT, k); it; --it) if (it.row() == it.col()) RHS_perm[it.row()] /= it.value(); else RHS_perm[it.row()] -= it.value() * RHS_perm[it.col()]; } } for (auto objIndex = 0; objIndex < numObjects; objIndex++) { ElasticObject* obj = m_objects[objIndex]; const std::vector& group = obj->m_particleIndices; auto& RHS = obj->m_RHS; auto& RHS_perm = obj->m_RHS_perm; auto& permInvInd = obj->m_factorization->m_permInvInd; #pragma omp parallel default(shared) { //invert permutation #pragma omp for schedule(static) for (int i = 0; i < (int)RHS.size(); i++) RHS[permInvInd[i]] = RHS_perm[i]; #pragma omp for schedule(static) for (int i = 0; i < (int)RHS.size(); i++) { const unsigned int i0 = group[i]; const unsigned int particleIndex = m_initial_to_current_index[i0]; if (m_model->getParticleState(particleIndex) == ParticleState::Active) { Vector3r& vi = m_model->getVelocity(particleIndex); const Vector3r &dx = RHS[i]; const Real fdt = obj->m_factorization->m_dt; vi += (1.0 / fdt) * dx; } } } } STOP_TIMING_AVG STOP_TIMING_AVG } void Elasticity_Kugelstadt2021::matrixVecProd(const Real* vec, Real *result, void *userData) { Simulation *sim = Simulation::getCurrent(); Elasticity_Kugelstadt2021* elasticity = static_cast(userData); FluidModel *model = elasticity->getModel(); const unsigned int numParticles = model->numActiveParticles(); const Real dt = TimeManager::getCurrent()->getTimeStepSize(); const auto ¤t_to_initial_index = elasticity->m_current_to_initial_index; const auto &initial_to_current_index = elasticity->m_initial_to_current_index; const auto &initialNeighbors = elasticity->m_initialNeighbors; const auto &restVolumes = elasticity->m_restVolumes; auto &stress = elasticity->m_stress; const auto &precomp_RL_gradW = elasticity->m_precomp_RL_gradW; const auto& precomp_RLj_gradW = elasticity->m_precomp_RLj_gradW; const auto &precomputed_indices = elasticity->m_precomputed_indices; #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (model->getParticleState(i) == ParticleState::Active) { const unsigned int i0 = current_to_initial_index[i]; const Vector3r &pi = Eigen::Map(&vec[3 * i], 3); const unsigned int numNeighbors = (unsigned int) initialNeighbors[i0].size(); // compute corotated deformation gradient // Fluid Real trace = 0.0; for (unsigned int j = 0; j < numNeighbors; j++) { const unsigned int neighborIndex = initial_to_current_index[initialNeighbors[i0][j]]; // get initial neighbor index considering the current particle order const unsigned int neighborIndex0 = initialNeighbors[i0][j]; const Vector3r &pj = Eigen::Map(&vec[3 * neighborIndex], 3); const Vector3r pj_pi = pj - pi; const Vector3r correctedRotatedKernel = precomp_RL_gradW[precomputed_indices[i] + j]; // We need the trace of the strain tensor. Therefore, we are only interested in the // diagonal elements which are F_ii-1. However, instead of computing the trace // of a dyadic product, we can determine the dot product of the vectors to get the trace // of F. Then the trace of the strain is the result minus 3. trace += pj_pi.dot(correctedRotatedKernel); } trace *= dt; // First Piola Kirchhoff stress = 2 mu epsilon + lambda trace(epsilon) I stress[i] = elasticity->m_lambda*trace; } else stress[i] = 0.0; } } #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (model->getParticleState(i) == ParticleState::Active) { const unsigned int i0 = current_to_initial_index[i]; const size_t numNeighbors = initialNeighbors[i0].size(); // Compute elastic force Vector3r force; force.setZero(); for (unsigned int j = 0; j < numNeighbors; j++) { const unsigned int neighborIndex = initial_to_current_index[initialNeighbors[i0][j]]; // get initial neighbor index considering the current particle order const unsigned int neighborIndex0 = initialNeighbors[i0][j]; const Vector3r correctedRotatedKernel_i = precomp_RL_gradW[precomputed_indices[i] + j]; const Vector3r correctedRotatedKernel_j = -precomp_RLj_gradW[precomputed_indices[i] + j]; const Vector3r PWi = stress[i] * correctedRotatedKernel_i; const Vector3r PWj = stress[neighborIndex] * correctedRotatedKernel_j; force += (restVolumes[i] * PWi - restVolumes[neighborIndex] * PWj); } const Real factor = dt / model->getMass(i); result[3 * i] = vec[3 * i] - factor * force[0]; result[3 * i + 1] = vec[3 * i + 1] - factor * force[1]; result[3 * i + 2] = vec[3 * i + 2] - factor * force[2]; } else { result[3 * i] = vec[3 * i]; result[3 * i + 1] = vec[3 * i + 1]; result[3 * i + 2] = vec[3 * i + 2]; } } } } void Elasticity_Kugelstadt2021::computeRHS(VectorXr & rhs) { Simulation *sim = Simulation::getCurrent(); const unsigned int numParticles = m_model->numActiveParticles(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); FluidModel *model = m_model; const Real dt = TimeManager::getCurrent()->getTimeStepSize(); #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { m_F[i].setZero(); if (m_model->getParticleState(i) == ParticleState::Active) { // compute corotated deformation gradient (Eq. 12) const unsigned int i0 = m_current_to_initial_index[i]; const Vector3r &xi = m_model->getPosition(i); const size_t numNeighbors = m_initialNeighbors[i0].size(); // Fluid for (unsigned int j = 0; j < numNeighbors; j++) { const unsigned int neighborIndex = m_initial_to_current_index[m_initialNeighbors[i0][j]]; const Vector3r &xj = model->getPosition(neighborIndex); const Vector3r xj_xi = xj - xi; const Vector3r correctedKernel = m_precomp_L_gradW[m_precomputed_indices[i] + j]; m_F[i] += xj_xi * correctedKernel.transpose(); } if (sim->is2DSimulation()) m_F[i](2, 2) = 1.0; // short form of: trace(R^T F - I) const Real trace = m_rotations[i].col(0).dot(m_F[i].col(0)) + m_rotations[i].col(1).dot(m_F[i].col(1)) + m_rotations[i].col(2).dot(m_F[i].col(2)) - static_cast(3.0); m_stress[i] = m_lambda*trace; } else m_stress[i] = 0.0; } } #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { if (model->getParticleState(i) == ParticleState::Active) { const unsigned int i0 = m_current_to_initial_index[i]; const Vector3r& xi0 = m_model->getPosition0(i0); const size_t numNeighbors = m_initialNeighbors[i0].size(); // Compute elastic force Vector3r force; force.setZero(); for (unsigned int j = 0; j < numNeighbors; j++) { const unsigned int neighborIndex = m_initial_to_current_index[m_initialNeighbors[i0][j]]; const Vector3r correctedRotatedKernel_i = m_precomp_RL_gradW[m_precomputed_indices[i] + j]; const Vector3r correctedRotatedKernel_j = m_precomp_RLj_gradW[m_precomputed_indices[i] + j]; const Vector3r PWi = m_stress[i] * correctedRotatedKernel_i; const Vector3r PWj = m_stress[neighborIndex] * correctedRotatedKernel_j; force += (PWi * m_restVolumes[i] + PWj * m_restVolumes[neighborIndex]); } rhs.segment<3>(3 * i) = model->getVelocity(i) + dt * (1.0 / model->getMass(i) * force); } else { rhs.segment<3>(3 * i) = model->getVelocity(i); } } } } #endif void Elasticity_Kugelstadt2021::precomputeValues() { Simulation* sim = Simulation::getCurrent(); const unsigned int fluidModelIndex = m_model->getPointSetIndex(); const int numParticles = (int)m_model->numActiveParticles(); #ifdef USE_AVX m_precomputed_indices8.clear(); m_precomp_L_gradW8.clear(); m_precomp_RL_gradW8.clear(); m_precomp_RLj_gradW8.clear(); m_precomputed_indices8.resize(numParticles); #else m_precomputed_indices.clear(); m_precomp_L_gradW.clear(); m_precomp_RL_gradW.clear(); m_precomp_RLj_gradW.clear(); m_precomputed_indices.resize(numParticles); #endif unsigned int sumNeighborParticles = 0; unsigned int sumNeighborParticles8 = 0; for (int i = 0; i < numParticles; i++) { const unsigned int i0 = m_current_to_initial_index[i]; const size_t numNeighbors = m_initialNeighbors[i0].size(); #ifdef USE_AVX m_precomputed_indices8[i] = sumNeighborParticles8; // steps of 8 values due to avx sumNeighborParticles8 += (unsigned int) numNeighbors / 8u; if (numNeighbors % 8 != 0) sumNeighborParticles8++; #else m_precomputed_indices[i] = sumNeighborParticles; sumNeighborParticles += numNeighbors; #endif } #ifdef USE_AVX m_precomp_L_gradW8.resize(sumNeighborParticles8); m_precomp_RL_gradW8.resize(sumNeighborParticles8); m_precomp_RLj_gradW8.resize(sumNeighborParticles8); #else m_precomp_L_gradW.resize(sumNeighborParticles); m_precomp_RL_gradW.resize(sumNeighborParticles); m_precomp_RLj_gradW.resize(sumNeighborParticles); #endif #pragma omp parallel default(shared) { #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { m_RL[i] = m_rotations[i].transpose() * m_L[i]; } #pragma omp for schedule(static) for (int i = 0; i < (int)numParticles; i++) { const unsigned int i0 = m_current_to_initial_index[i]; const Vector3r& xi0 = m_model->getPosition0(i0); const unsigned int numNeighbors = (unsigned int)m_initialNeighbors[i0].size(); #ifdef USE_AVX const Vector3f8 xi0_avx(xi0); Scalarf8 restVolume_i_avx(m_restVolumes[i]); unsigned int base8 = m_precomputed_indices8[i]; unsigned int idx = 0; Matrix3f8 L_avx(m_L[i]); Matrix3f8 RL_avx(m_RL[i]); for (unsigned int j = 0; j < numNeighbors; j += 8) { const unsigned int count = std::min(numNeighbors - j, 8u); unsigned int nIndices[8]; for (auto k = 0u; k < count; k++) nIndices[k] = m_initial_to_current_index[m_initialNeighbors[i0][j + k]]; const Scalarf8 restVolume_j_avx = convert_zero(nIndices, &m_restVolumes[0], count); const Vector3f8 xj0_avx = convertVec_zero(&m_initialNeighbors[i0][j], &m_model->getPosition0(0), count); const Matrix3f8 RLj_avx = convertMat_zero(nIndices, &m_RL[0], count); const Vector3f8 gradW = CubicKernel_AVX::gradW(xi0_avx - xj0_avx); m_precomp_L_gradW8[base8 + idx] = L_avx * gradW * restVolume_j_avx; m_precomp_RL_gradW8[base8 + idx] = RL_avx * gradW * restVolume_j_avx; m_precomp_RLj_gradW8[base8 + idx] = RLj_avx * gradW * restVolume_i_avx; idx++; } #else unsigned int base = m_precomputed_indices[i]; for (unsigned int j = 0; j < numNeighbors; j++) { // get initial neighbor index considering the current particle order const unsigned int neighborIndex0 = m_initialNeighbors[i0][j]; const unsigned int neighborIndex = m_initial_to_current_index[m_initialNeighbors[i0][j]]; const Vector3r& xj0 = m_model->getPosition0(neighborIndex0); Vector3r gradW = sim->gradW(xi0 - xj0); m_precomp_L_gradW[base + j] = m_restVolumes[neighborIndex] * m_L[i] * gradW; m_precomp_RL_gradW[base + j] = m_restVolumes[neighborIndex] * m_RL[i] * gradW; m_precomp_RLj_gradW[base + j] = m_restVolumes[i] * m_RL[neighborIndex] * gradW; } #endif } } }