OrcaSlicer/src/libigl/igl/svd3x3_sse.cpp
tamasmeszaros 2ae2672ee9 Building igl statically and moving to the dep scripts
Fixing dep build script on Windows and removing some warnings.

Use bundled igl by default.

Not building with the dependency scripts if not explicitly stated. This way, it will stay in
Fix the libigl patch to include C source files in header only mode.
2019-06-19 14:52:55 +02:00

108 lines
4 KiB
C++

// This file is part of libigl, a simple c++ geometry processing library.
//
// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla Public License
// v. 2.0. If a copy of the MPL was not distributed with this file, You can
// obtain one at http://mozilla.org/MPL/2.0/.
#ifdef __SSE__
#include "svd3x3_sse.h"
#include <cmath>
#include <algorithm>
#undef USE_SCALAR_IMPLEMENTATION
#define USE_SSE_IMPLEMENTATION
#undef USE_AVX_IMPLEMENTATION
#define COMPUTE_U_AS_MATRIX
#define COMPUTE_V_AS_MATRIX
#include "Singular_Value_Decomposition_Preamble.hpp"
// disable runtime asserts on xor eax,eax type of stuff (doesn't always work,
// disable explicitly in compiler settings)
#pragma runtime_checks( "u", off )
template<typename T>
IGL_INLINE void igl::svd3x3_sse(
const Eigen::Matrix<T, 3*4, 3>& A,
Eigen::Matrix<T, 3*4, 3> &U,
Eigen::Matrix<T, 3*4, 1> &S,
Eigen::Matrix<T, 3*4, 3>&V)
{
// this code assumes USE_SSE_IMPLEMENTATION is defined
float Ashuffle[9][4], Ushuffle[9][4], Vshuffle[9][4], Sshuffle[3][4];
for (int i=0; i<3; i++)
{
for (int j=0; j<3; j++)
{
for (int k=0; k<4; k++)
{
Ashuffle[i + j*3][k] = A(i + 3*k, j);
}
}
}
#include "Singular_Value_Decomposition_Kernel_Declarations.hpp"
ENABLE_SSE_IMPLEMENTATION(Va11=_mm_loadu_ps(Ashuffle[0]);)
ENABLE_SSE_IMPLEMENTATION(Va21=_mm_loadu_ps(Ashuffle[1]);)
ENABLE_SSE_IMPLEMENTATION(Va31=_mm_loadu_ps(Ashuffle[2]);)
ENABLE_SSE_IMPLEMENTATION(Va12=_mm_loadu_ps(Ashuffle[3]);)
ENABLE_SSE_IMPLEMENTATION(Va22=_mm_loadu_ps(Ashuffle[4]);)
ENABLE_SSE_IMPLEMENTATION(Va32=_mm_loadu_ps(Ashuffle[5]);)
ENABLE_SSE_IMPLEMENTATION(Va13=_mm_loadu_ps(Ashuffle[6]);)
ENABLE_SSE_IMPLEMENTATION(Va23=_mm_loadu_ps(Ashuffle[7]);)
ENABLE_SSE_IMPLEMENTATION(Va33=_mm_loadu_ps(Ashuffle[8]);)
#include "Singular_Value_Decomposition_Main_Kernel_Body.hpp"
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[0],Vu11);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[1],Vu21);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[2],Vu31);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[3],Vu12);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[4],Vu22);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[5],Vu32);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[6],Vu13);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[7],Vu23);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[8],Vu33);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[0],Vv11);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[1],Vv21);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[2],Vv31);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[3],Vv12);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[4],Vv22);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[5],Vv32);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[6],Vv13);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[7],Vv23);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[8],Vv33);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Sshuffle[0],Va11);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Sshuffle[1],Va22);)
ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Sshuffle[2],Va33);)
for (int i=0; i<3; i++)
{
for (int j=0; j<3; j++)
{
for (int k=0; k<4; k++)
{
U(i + 3*k, j) = Ushuffle[i + j*3][k];
V(i + 3*k, j) = Vshuffle[i + j*3][k];
}
}
}
for (int i=0; i<3; i++)
{
for (int k=0; k<4; k++)
{
S(i + 3*k, 0) = Sshuffle[i][k];
}
}
}
#pragma runtime_checks( "u", restore )
// forced instantiation
template void igl::svd3x3_sse(const Eigen::Matrix<float, 3*4, 3>& A, Eigen::Matrix<float, 3*4, 3> &U, Eigen::Matrix<float, 3*4, 1> &S, Eigen::Matrix<float, 3*4, 3>&V);
//// doesn't even make sense with double because the wunder-SVD code is only single precision anyway...
//template void wunderSVD3x3_SSE<float>(Eigen::Matrix<float, 12, 3, 0, 12, 3> const&, Eigen::Matrix<float, 12, 3, 0, 12, 3>&, Eigen::Matrix<float, 12, 1, 0, 12, 1>&, Eigen::Matrix<float, 12, 3, 0, 12, 3>&);
#endif