mirror of
https://github.com/SoftFever/OrcaSlicer.git
synced 2025-08-06 13:34:05 -06:00
Upgraded the bundled Eigen to 3.7
This commit is contained in:
parent
1ee0f8cc85
commit
8696c70af4
33 changed files with 273 additions and 174 deletions
|
@ -53,9 +53,9 @@
|
|||
#endif
|
||||
|
||||
#define EIGEN_DEVICE_FUNC __host__ __device__
|
||||
// We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
|
||||
// We need cuda_runtime.h to ensure that that EIGEN_USING_STD_MATH macro
|
||||
// works properly on the device side
|
||||
#include <math_functions.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#else
|
||||
#define EIGEN_DEVICE_FUNC
|
||||
#endif
|
||||
|
|
|
@ -305,7 +305,8 @@ template<> struct ldlt_inplace<Lower>
|
|||
if (size <= 1)
|
||||
{
|
||||
transpositions.setIdentity();
|
||||
if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
|
||||
if(size==0) sign = ZeroSign;
|
||||
else if (numext::real(mat.coeff(0,0)) > static_cast<RealScalar>(0) ) sign = PositiveSemiDef;
|
||||
else if (numext::real(mat.coeff(0,0)) < static_cast<RealScalar>(0)) sign = NegativeSemiDef;
|
||||
else sign = ZeroSign;
|
||||
return true;
|
||||
|
|
|
@ -153,8 +153,6 @@ class Array
|
|||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
|
|
|
@ -160,7 +160,7 @@ rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Deco
|
|||
{
|
||||
typedef typename Decomposition::RealScalar RealScalar;
|
||||
eigen_assert(dec.rows() == dec.cols());
|
||||
if (dec.rows() == 0) return RealScalar(1);
|
||||
if (dec.rows() == 0) return NumTraits<RealScalar>::infinity();
|
||||
if (matrix_norm == RealScalar(0)) return RealScalar(0);
|
||||
if (dec.rows() == 1) return RealScalar(1);
|
||||
const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec);
|
||||
|
|
|
@ -43,6 +43,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
|||
enum {
|
||||
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
|
||||
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
|
||||
InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
|
||||
SizeAtCompileTime = Base::SizeAtCompileTime
|
||||
};
|
||||
|
||||
|
@ -187,8 +188,11 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
|
|||
void checkSanity(typename internal::enable_if<(internal::traits<T>::Alignment>0),void*>::type = 0) const
|
||||
{
|
||||
#if EIGEN_MAX_ALIGN_BYTES>0
|
||||
// innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible value:
|
||||
const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
|
||||
EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
|
||||
eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits<Derived>::Alignment) == 0)
|
||||
|| (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
|
||||
|| (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment ) && "data is not aligned");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -616,21 +616,28 @@ template<typename Scalar>
|
|||
struct random_default_impl<Scalar, false, true>
|
||||
{
|
||||
static inline Scalar run(const Scalar& x, const Scalar& y)
|
||||
{
|
||||
typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX;
|
||||
if(y<x)
|
||||
{
|
||||
if (y <= x)
|
||||
return x;
|
||||
// the following difference might overflow on a 32 bits system,
|
||||
// but since y>=x the result converted to an unsigned long is still correct.
|
||||
std::size_t range = ScalarX(y)-ScalarX(x);
|
||||
std::size_t offset = 0;
|
||||
// rejection sampling
|
||||
std::size_t divisor = 1;
|
||||
std::size_t multiplier = 1;
|
||||
if(range<RAND_MAX) divisor = (std::size_t(RAND_MAX)+1)/(range+1);
|
||||
else multiplier = 1 + range/(std::size_t(RAND_MAX)+1);
|
||||
// ScalarU is the unsigned counterpart of Scalar, possibly Scalar itself.
|
||||
typedef typename make_unsigned<Scalar>::type ScalarU;
|
||||
// ScalarX is the widest of ScalarU and unsigned int.
|
||||
// We'll deal only with ScalarX and unsigned int below thus avoiding signed
|
||||
// types and arithmetic and signed overflows (which are undefined behavior).
|
||||
typedef typename conditional<(ScalarU(-1) > unsigned(-1)), ScalarU, unsigned>::type ScalarX;
|
||||
// The following difference doesn't overflow, provided our integer types are two's
|
||||
// complement and have the same number of padding bits in signed and unsigned variants.
|
||||
// This is the case in most modern implementations of C++.
|
||||
ScalarX range = ScalarX(y) - ScalarX(x);
|
||||
ScalarX offset = 0;
|
||||
ScalarX divisor = 1;
|
||||
ScalarX multiplier = 1;
|
||||
const unsigned rand_max = RAND_MAX;
|
||||
if (range <= rand_max) divisor = (rand_max + 1) / (range + 1);
|
||||
else multiplier = 1 + range / (rand_max + 1);
|
||||
// Rejection sampling.
|
||||
do {
|
||||
offset = (std::size_t(std::rand()) * multiplier) / divisor;
|
||||
offset = (unsigned(std::rand()) * multiplier) / divisor;
|
||||
} while (offset > range);
|
||||
return Scalar(ScalarX(x) + offset);
|
||||
}
|
||||
|
@ -1006,7 +1013,8 @@ inline int log2(int x)
|
|||
|
||||
/** \returns the square root of \a x.
|
||||
*
|
||||
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
|
||||
* It is essentially equivalent to
|
||||
* \code using std::sqrt; return sqrt(x); \endcode
|
||||
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
|
||||
* specializations when SSE is enabled.
|
||||
*
|
||||
|
|
|
@ -274,8 +274,6 @@ class Matrix
|
|||
: Base(std::move(other))
|
||||
{
|
||||
Base::_check_template_params();
|
||||
if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic)
|
||||
Base::_set_noalias(other);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC
|
||||
Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
|
||||
|
|
|
@ -444,16 +444,24 @@ template<typename Derived> class MatrixBase
|
|||
///////// MatrixFunctions module /////////
|
||||
|
||||
typedef typename internal::stem_function<Scalar>::type StemFunction;
|
||||
const MatrixExponentialReturnValue<Derived> exp() const;
|
||||
#define EIGEN_MATRIX_FUNCTION(ReturnType, Name, Description) \
|
||||
/** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
|
||||
const ReturnType<Derived> Name() const;
|
||||
#define EIGEN_MATRIX_FUNCTION_1(ReturnType, Name, Description, Argument) \
|
||||
/** \returns an expression of the matrix Description of \c *this. \brief This function requires the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>. To compute the coefficient-wise Description use ArrayBase::##Name . */ \
|
||||
const ReturnType<Derived> Name(Argument) const;
|
||||
|
||||
EIGEN_MATRIX_FUNCTION(MatrixExponentialReturnValue, exp, exponential)
|
||||
/** \brief Helper function for the <a href="unsupported/group__MatrixFunctions__Module.html"> unsupported MatrixFunctions module</a>.*/
|
||||
const MatrixFunctionReturnValue<Derived> matrixFunction(StemFunction f) const;
|
||||
const MatrixFunctionReturnValue<Derived> cosh() const;
|
||||
const MatrixFunctionReturnValue<Derived> sinh() const;
|
||||
const MatrixFunctionReturnValue<Derived> cos() const;
|
||||
const MatrixFunctionReturnValue<Derived> sin() const;
|
||||
const MatrixSquareRootReturnValue<Derived> sqrt() const;
|
||||
const MatrixLogarithmReturnValue<Derived> log() const;
|
||||
const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;
|
||||
const MatrixComplexPowerReturnValue<Derived> pow(const std::complex<RealScalar>& p) const;
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root)
|
||||
EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm)
|
||||
EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue, pow, power to \c p, const RealScalar& p)
|
||||
EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const std::complex<RealScalar>& p)
|
||||
|
||||
protected:
|
||||
EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
|
||||
|
|
|
@ -169,6 +169,9 @@ void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<Ot
|
|||
OtherDerived& other = _other.const_cast_derived();
|
||||
eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
|
||||
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
|
||||
// If solving for a 0x0 matrix, nothing to do, simply return.
|
||||
if (derived().cols() == 0)
|
||||
return;
|
||||
|
||||
enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
|
||||
typedef typename internal::conditional<copy,
|
||||
|
|
|
@ -159,11 +159,12 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
|
|||
|
||||
#ifdef __FMA__
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
|
||||
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and gcc stupidly generates a vfmadd132ps instruction,
|
||||
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
|
||||
// the result of the product.
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||
// and even register spilling with clang>=6.0 (bug 1637).
|
||||
// Gcc stupidly generates a vfmadd132ps instruction.
|
||||
// So let's enforce it to generate a vfmadd231ps instruction since the most common use
|
||||
// case is to accumulate the result of the product.
|
||||
Packet8f res = c;
|
||||
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
return res;
|
||||
|
@ -172,7 +173,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
|
|||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
|
||||
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||
// see above
|
||||
Packet4d res = c;
|
||||
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||
|
|
|
@ -648,13 +648,13 @@ template<> EIGEN_STRONG_INLINE Packet8d preverse(const Packet8d& a)
|
|||
template<> EIGEN_STRONG_INLINE Packet16f pabs(const Packet16f& a)
|
||||
{
|
||||
// _mm512_abs_ps intrinsic not found, so hack around it
|
||||
return (__m512)_mm512_and_si512((__m512i)a, _mm512_set1_epi32(0x7fffffff));
|
||||
return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x7fffffff)));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) {
|
||||
// _mm512_abs_ps intrinsic not found, so hack around it
|
||||
return (__m512d)_mm512_and_si512((__m512i)a,
|
||||
_mm512_set1_epi64(0x7fffffffffffffff));
|
||||
return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a),
|
||||
_mm512_set1_epi64(0x7fffffffffffffff)));
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
// type Eigen::half (inheriting from CUDA's __half struct) with
|
||||
// operator overloads such that it behaves basically as an arithmetic
|
||||
// type. It will be quite slow on CPUs (so it is recommended to stay
|
||||
// in fp32 for CPUs, except for simple parameter conversions, I/O
|
||||
// in float32_bits for CPUs, except for simple parameter conversions, I/O
|
||||
// to disk and the likes), but fast on GPUs.
|
||||
|
||||
|
||||
|
@ -50,38 +50,45 @@ struct half;
|
|||
namespace half_impl {
|
||||
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16)
|
||||
|
||||
// Make our own __half definition that is similar to CUDA's.
|
||||
struct __half {
|
||||
EIGEN_DEVICE_FUNC __half() {}
|
||||
explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {}
|
||||
// Make our own __half_raw definition that is similar to CUDA's.
|
||||
struct __half_raw {
|
||||
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
|
||||
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
|
||||
unsigned short x;
|
||||
};
|
||||
|
||||
#elif defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
|
||||
// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
|
||||
typedef __half __half_raw;
|
||||
#endif
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h);
|
||||
|
||||
struct half_base : public __half {
|
||||
struct half_base : public __half_raw {
|
||||
EIGEN_DEVICE_FUNC half_base() {}
|
||||
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half_raw(h) {}
|
||||
EIGEN_DEVICE_FUNC half_base(const __half_raw& h) : __half_raw(h) {}
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
|
||||
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace half_impl
|
||||
|
||||
// Class definition.
|
||||
struct half : public half_impl::half_base {
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16)
|
||||
typedef half_impl::__half __half;
|
||||
#if !defined(EIGEN_HAS_CUDA_FP16) || (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000)
|
||||
typedef half_impl::__half_raw __half_raw;
|
||||
#endif
|
||||
|
||||
EIGEN_DEVICE_FUNC half() {}
|
||||
|
||||
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC half(const __half_raw& h) : half_impl::half_base(h) {}
|
||||
EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {}
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
|
||||
EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {}
|
||||
#endif
|
||||
|
||||
explicit EIGEN_DEVICE_FUNC half(bool b)
|
||||
: half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {}
|
||||
|
@ -138,12 +145,66 @@ struct half : public half_impl::half_base {
|
|||
}
|
||||
};
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct numeric_limits<Eigen::half> {
|
||||
static const bool is_specialized = true;
|
||||
static const bool is_signed = true;
|
||||
static const bool is_integer = false;
|
||||
static const bool is_exact = false;
|
||||
static const bool has_infinity = true;
|
||||
static const bool has_quiet_NaN = true;
|
||||
static const bool has_signaling_NaN = true;
|
||||
static const float_denorm_style has_denorm = denorm_present;
|
||||
static const bool has_denorm_loss = false;
|
||||
static const std::float_round_style round_style = std::round_to_nearest;
|
||||
static const bool is_iec559 = false;
|
||||
static const bool is_bounded = false;
|
||||
static const bool is_modulo = false;
|
||||
static const int digits = 11;
|
||||
static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
||||
static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
||||
static const int radix = 2;
|
||||
static const int min_exponent = -13;
|
||||
static const int min_exponent10 = -4;
|
||||
static const int max_exponent = 16;
|
||||
static const int max_exponent10 = 4;
|
||||
static const bool traps = true;
|
||||
static const bool tinyness_before = false;
|
||||
|
||||
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
|
||||
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
|
||||
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
|
||||
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
|
||||
static Eigen::half round_error() { return Eigen::half(0.5); }
|
||||
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
|
||||
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
|
||||
};
|
||||
|
||||
// If std::numeric_limits<T> is specialized, should also specialize
|
||||
// std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
|
||||
// std::numeric_limits<const volatile T>
|
||||
// https://stackoverflow.com/a/16519653/
|
||||
template<>
|
||||
struct numeric_limits<const Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
template<>
|
||||
struct numeric_limits<volatile Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
template<>
|
||||
struct numeric_limits<const volatile Eigen::half> : numeric_limits<Eigen::half> {};
|
||||
} // end namespace std
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
namespace half_impl {
|
||||
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
|
||||
// Intrinsics for native fp16 support. Note that on current hardware,
|
||||
// these are no faster than fp32 arithmetic (you need to use the half2
|
||||
// these are no faster than float32_bits arithmetic (you need to use the half2
|
||||
// versions to get the ALU speed increased), but you do save the
|
||||
// conversion steps back and forth.
|
||||
|
||||
|
@ -202,7 +263,7 @@ EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
|
|||
#else // Emulate support for half floats
|
||||
|
||||
// Definitions for CPUs and older CUDA, mostly working through conversion
|
||||
// to/from fp32.
|
||||
// to/from float32_bits.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator + (const half& a, const half& b) {
|
||||
return half(float(a) + float(b));
|
||||
|
@ -269,34 +330,35 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half operator / (const half& a, Index b) {
|
|||
// these in hardware. If we need more performance on older/other CPUs, they are
|
||||
// also possible to vectorize directly.
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x) {
|
||||
__half h;
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x) {
|
||||
__half_raw h;
|
||||
h.x = x;
|
||||
return h;
|
||||
}
|
||||
|
||||
union FP32 {
|
||||
union float32_bits {
|
||||
unsigned int u;
|
||||
float f;
|
||||
};
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
return __float2half(ff);
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw float_to_half_rtne(float ff) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
__half tmp_ff = __float2half(ff);
|
||||
return *(__half_raw*)&tmp_ff;
|
||||
|
||||
#elif defined(EIGEN_HAS_FP16_C)
|
||||
__half h;
|
||||
__half_raw h;
|
||||
h.x = _cvtss_sh(ff, 0);
|
||||
return h;
|
||||
|
||||
#else
|
||||
FP32 f; f.f = ff;
|
||||
float32_bits f; f.f = ff;
|
||||
|
||||
const FP32 f32infty = { 255 << 23 };
|
||||
const FP32 f16max = { (127 + 16) << 23 };
|
||||
const FP32 denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
|
||||
const float32_bits f32infty = { 255 << 23 };
|
||||
const float32_bits f16max = { (127 + 16) << 23 };
|
||||
const float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
|
||||
unsigned int sign_mask = 0x80000000u;
|
||||
__half o;
|
||||
__half_raw o;
|
||||
o.x = static_cast<unsigned short>(0x0u);
|
||||
|
||||
unsigned int sign = f.u & sign_mask;
|
||||
|
@ -335,17 +397,17 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff) {
|
|||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half_raw h) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
return __half2float(h);
|
||||
|
||||
#elif defined(EIGEN_HAS_FP16_C)
|
||||
return _cvtsh_ss(h.x);
|
||||
|
||||
#else
|
||||
const FP32 magic = { 113 << 23 };
|
||||
const float32_bits magic = { 113 << 23 };
|
||||
const unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
||||
FP32 o;
|
||||
float32_bits o;
|
||||
|
||||
o.u = (h.x & 0x7fff) << 13; // exponent/mantissa bits
|
||||
unsigned int exp = shifted_exp & o.u; // just the exponent
|
||||
|
@ -370,7 +432,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const half& a) {
|
|||
return (a.x & 0x7fff) == 0x7c00;
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const half& a) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hisnan(a);
|
||||
#else
|
||||
return (a.x & 0x7fff) > 0x7c00;
|
||||
|
@ -443,7 +505,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
|
|||
}
|
||||
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hlt(b, a) ? b : a;
|
||||
#else
|
||||
const float f1 = static_cast<float>(a);
|
||||
|
@ -452,7 +514,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
|
|||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (max)(const half& a, const half& b) {
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
|
||||
#if defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
|
||||
return __hlt(a, b) ? b : a;
|
||||
#else
|
||||
const float f1 = static_cast<float>(a);
|
||||
|
@ -490,49 +552,6 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
|
|||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
namespace std {
|
||||
template<>
|
||||
struct numeric_limits<Eigen::half> {
|
||||
static const bool is_specialized = true;
|
||||
static const bool is_signed = true;
|
||||
static const bool is_integer = false;
|
||||
static const bool is_exact = false;
|
||||
static const bool has_infinity = true;
|
||||
static const bool has_quiet_NaN = true;
|
||||
static const bool has_signaling_NaN = true;
|
||||
static const float_denorm_style has_denorm = denorm_present;
|
||||
static const bool has_denorm_loss = false;
|
||||
static const std::float_round_style round_style = std::round_to_nearest;
|
||||
static const bool is_iec559 = false;
|
||||
static const bool is_bounded = false;
|
||||
static const bool is_modulo = false;
|
||||
static const int digits = 11;
|
||||
static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
||||
static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
|
||||
static const int radix = 2;
|
||||
static const int min_exponent = -13;
|
||||
static const int min_exponent10 = -4;
|
||||
static const int max_exponent = 16;
|
||||
static const int max_exponent10 = 4;
|
||||
static const bool traps = true;
|
||||
static const bool tinyness_before = false;
|
||||
|
||||
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
|
||||
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
|
||||
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
|
||||
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
|
||||
static Eigen::half round_error() { return Eigen::half(0.5); }
|
||||
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
|
||||
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
|
||||
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
|
||||
};
|
||||
}
|
||||
|
||||
namespace Eigen {
|
||||
|
||||
template<> struct NumTraits<Eigen::half>
|
||||
: GenericNumTraits<Eigen::half>
|
||||
{
|
||||
|
@ -607,14 +626,18 @@ struct hash<Eigen::half> {
|
|||
|
||||
|
||||
// Add the missing shfl_xor intrinsic
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
|
||||
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300
|
||||
__device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneMask, int width=warpSize) {
|
||||
#if EIGEN_CUDACC_VER < 90000
|
||||
return static_cast<Eigen::half>(__shfl_xor(static_cast<float>(var), laneMask, width));
|
||||
#else
|
||||
return static_cast<Eigen::half>(__shfl_xor_sync(0xFFFFFFFF, static_cast<float>(var), laneMask, width));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// ldg() has an overload for __half, but we also need one for Eigen::half.
|
||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350
|
||||
// ldg() has an overload for __half_raw, but we also need one for Eigen::half.
|
||||
#if defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 350
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) {
|
||||
return Eigen::half_impl::raw_uint16_to_half(
|
||||
__ldg(reinterpret_cast<const unsigned short*>(ptr)));
|
||||
|
@ -622,7 +645,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr)
|
|||
#endif
|
||||
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
#if defined(EIGEN_CUDA_ARCH)
|
||||
namespace Eigen {
|
||||
namespace numext {
|
||||
|
||||
|
|
|
@ -99,7 +99,8 @@ template<> __device__ EIGEN_STRONG_INLINE Eigen::half pfirst<half2>(const half2&
|
|||
|
||||
template<> __device__ EIGEN_STRONG_INLINE half2 pabs<half2>(const half2& a) {
|
||||
half2 result;
|
||||
result.x = a.x & 0x7FFF7FFF;
|
||||
unsigned temp = *(reinterpret_cast<const unsigned*>(&(a)));
|
||||
*(reinterpret_cast<unsigned*>(&(result))) = temp & 0x7FFF7FFF;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ namespace internal {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if (defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)
|
||||
#if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
|
||||
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
||||
// have overloads for both types without linking error.
|
||||
// One solution is to increase ABI version using -fabi-version=4 (or greater).
|
||||
|
|
|
@ -1197,10 +1197,16 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||
EIGEN_ASM_COMMENT("begin gebp micro kernel 2pX4");
|
||||
RhsPacket B_0, B1, B2, B3, T0;
|
||||
|
||||
#define EIGEN_GEBGP_ONESTEP(K) \
|
||||
// NOTE: the begin/end asm comments below work around bug 935!
|
||||
// but they are not enough for gcc>=6 without FMA (bug 1637)
|
||||
#if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
|
||||
#define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__ ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1));
|
||||
#else
|
||||
#define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND
|
||||
#endif
|
||||
#define EIGEN_GEBGP_ONESTEP(K) \
|
||||
do { \
|
||||
EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \
|
||||
EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
|
||||
traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
|
||||
traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
|
||||
traits.broadcastRhs(&blB[(0+4*K)*RhsProgress], B_0, B1, B2, B3); \
|
||||
|
@ -1212,6 +1218,7 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||
traits.madd(A1, B2, C6, B2); \
|
||||
traits.madd(A0, B3, C3, T0); \
|
||||
traits.madd(A1, B3, C7, B3); \
|
||||
EIGEN_GEBP_2PX4_SPILLING_WORKAROUND \
|
||||
EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \
|
||||
} while(false)
|
||||
|
||||
|
@ -1526,10 +1533,10 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||
// The following piece of code wont work for 512 bit registers
|
||||
// Moreover, if LhsProgress==8 it assumes that there is a half packet of the same size
|
||||
// as nr (which is currently 4) for the return type.
|
||||
typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
|
||||
const int SResPacketHalfSize = unpacket_traits<typename unpacket_traits<SResPacket>::half>::size;
|
||||
if ((SwappedTraits::LhsProgress % 4) == 0 &&
|
||||
(SwappedTraits::LhsProgress <= 8) &&
|
||||
(SwappedTraits::LhsProgress!=8 || unpacket_traits<SResPacketHalf>::size==nr))
|
||||
(SwappedTraits::LhsProgress!=8 || SResPacketHalfSize==nr))
|
||||
{
|
||||
SAccPacket C0, C1, C2, C3;
|
||||
straits.initAcc(C0);
|
||||
|
|
|
@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
|
|||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
|
||||
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
|
||||
{ \
|
||||
if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \
|
||||
if ( lhs==rhs && ((UpLo&(Lower|Upper))==UpLo) ) { \
|
||||
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
|
||||
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
|
||||
} else { \
|
||||
|
|
|
@ -43,12 +43,20 @@
|
|||
#endif
|
||||
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
|
||||
|
||||
#elif defined __GNUC__ && __GNUC__>=6
|
||||
#elif defined __GNUC__
|
||||
|
||||
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
|
||||
#if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
|
||||
#pragma GCC diagnostic push
|
||||
#endif
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
// g++ warns about local variables shadowing member functions, which is too strict
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ < 8
|
||||
// Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions:
|
||||
#pragma GCC diagnostic ignored "-Wtype-limits"
|
||||
#endif
|
||||
#if __GNUC__>=6
|
||||
#pragma GCC diagnostic ignored "-Wignored-attributes"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
#define EIGEN_WORLD_VERSION 3
|
||||
#define EIGEN_MAJOR_VERSION 3
|
||||
#define EIGEN_MINOR_VERSION 5
|
||||
#define EIGEN_MINOR_VERSION 7
|
||||
|
||||
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
|
||||
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
|
||||
|
|
|
@ -747,7 +747,15 @@ public:
|
|||
pointer allocate(size_type num, const void* /*hint*/ = 0)
|
||||
{
|
||||
internal::check_size_for_overflow<T>(num);
|
||||
return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
|
||||
size_type size = num * sizeof(T);
|
||||
#if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
|
||||
// workaround gcc bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
|
||||
// It triggered eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
|
||||
if(size>=std::size_t((std::numeric_limits<std::ptrdiff_t>::max)()))
|
||||
return 0;
|
||||
else
|
||||
#endif
|
||||
return static_cast<pointer>( internal::aligned_malloc(size) );
|
||||
}
|
||||
|
||||
void deallocate(pointer p, size_type /*num*/)
|
||||
|
|
|
@ -109,6 +109,28 @@ template<> struct is_integral<unsigned int> { enum { value = true }; };
|
|||
template<> struct is_integral<signed long> { enum { value = true }; };
|
||||
template<> struct is_integral<unsigned long> { enum { value = true }; };
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
using std::make_unsigned;
|
||||
#else
|
||||
// TODO: Possibly improve this implementation of make_unsigned.
|
||||
// It is currently used only by
|
||||
// template<typename Scalar> struct random_default_impl<Scalar, false, true>.
|
||||
template<typename> struct make_unsigned;
|
||||
template<> struct make_unsigned<char> { typedef unsigned char type; };
|
||||
template<> struct make_unsigned<signed char> { typedef unsigned char type; };
|
||||
template<> struct make_unsigned<unsigned char> { typedef unsigned char type; };
|
||||
template<> struct make_unsigned<signed short> { typedef unsigned short type; };
|
||||
template<> struct make_unsigned<unsigned short> { typedef unsigned short type; };
|
||||
template<> struct make_unsigned<signed int> { typedef unsigned int type; };
|
||||
template<> struct make_unsigned<unsigned int> { typedef unsigned int type; };
|
||||
template<> struct make_unsigned<signed long> { typedef unsigned long type; };
|
||||
template<> struct make_unsigned<unsigned long> { typedef unsigned long type; };
|
||||
#if EIGEN_COMP_MSVC
|
||||
template<> struct make_unsigned<signed __int64> { typedef unsigned __int64 type; };
|
||||
template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <typename T> struct add_const { typedef const T type; };
|
||||
template <typename T> struct add_const<T&> { typedef T& type; };
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#pragma warning pop
|
||||
#elif defined __clang__
|
||||
#pragma clang diagnostic pop
|
||||
#elif defined __GNUC__ && __GNUC__>=6
|
||||
#elif defined __GNUC__ && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
|
|
@ -66,7 +66,6 @@ template<typename Derived>
|
|||
inline typename MatrixBase<Derived>::EigenvaluesReturnType
|
||||
MatrixBase<Derived>::eigenvalues() const
|
||||
{
|
||||
typedef typename internal::traits<Derived>::Scalar Scalar;
|
||||
return internal::eigenvalues_selector<Derived, NumTraits<Scalar>::IsComplex>::run(derived());
|
||||
}
|
||||
|
||||
|
@ -88,7 +87,6 @@ template<typename MatrixType, unsigned int UpLo>
|
|||
inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType
|
||||
SelfAdjointView<MatrixType, UpLo>::eigenvalues() const
|
||||
{
|
||||
typedef typename SelfAdjointView<MatrixType, UpLo>::PlainObject PlainObject;
|
||||
PlainObject thisAsMatrix(*this);
|
||||
return SelfAdjointEigenSolver<PlainObject>(thisAsMatrix, false).eigenvalues();
|
||||
}
|
||||
|
|
|
@ -50,7 +50,8 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
|
|||
tol_error = 0;
|
||||
return;
|
||||
}
|
||||
RealScalar threshold = tol*tol*rhsNorm2;
|
||||
const RealScalar considerAsZero = (std::numeric_limits<RealScalar>::min)();
|
||||
RealScalar threshold = numext::maxi(tol*tol*rhsNorm2,considerAsZero);
|
||||
RealScalar residualNorm2 = residual.squaredNorm();
|
||||
if (residualNorm2 < threshold)
|
||||
{
|
||||
|
@ -58,7 +59,7 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
|
|||
tol_error = sqrt(residualNorm2 / rhsNorm2);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
VectorType p(n);
|
||||
p = precond.solve(residual); // initial search direction
|
||||
|
||||
|
|
|
@ -65,11 +65,11 @@ template<typename Scalar> class JacobiRotation
|
|||
bool makeJacobi(const MatrixBase<Derived>&, Index p, Index q);
|
||||
bool makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z);
|
||||
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* z=0);
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* r=0);
|
||||
|
||||
protected:
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* z, internal::true_type);
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* z, internal::false_type);
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::true_type);
|
||||
void makeGivens(const Scalar& p, const Scalar& q, Scalar* r, internal::false_type);
|
||||
|
||||
Scalar m_c, m_s;
|
||||
};
|
||||
|
@ -84,7 +84,6 @@ bool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, co
|
|||
{
|
||||
using std::sqrt;
|
||||
using std::abs;
|
||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||
RealScalar deno = RealScalar(2)*abs(y);
|
||||
if(deno < (std::numeric_limits<RealScalar>::min)())
|
||||
{
|
||||
|
@ -133,7 +132,7 @@ inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, Ind
|
|||
* \f$ V = \left ( \begin{array}{c} p \\ q \end{array} \right )\f$ yields:
|
||||
* \f$ G^* V = \left ( \begin{array}{c} r \\ 0 \end{array} \right )\f$.
|
||||
*
|
||||
* The value of \a z is returned if \a z is not null (the default is null).
|
||||
* The value of \a r is returned if \a r is not null (the default is null).
|
||||
* Also note that G is built such that the cosine is always real.
|
||||
*
|
||||
* Example: \include Jacobi_makeGivens.cpp
|
||||
|
@ -146,9 +145,9 @@ inline bool JacobiRotation<Scalar>::makeJacobi(const MatrixBase<Derived>& m, Ind
|
|||
* \sa MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
|
||||
*/
|
||||
template<typename Scalar>
|
||||
void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* z)
|
||||
void JacobiRotation<Scalar>::makeGivens(const Scalar& p, const Scalar& q, Scalar* r)
|
||||
{
|
||||
makeGivens(p, q, z, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type());
|
||||
makeGivens(p, q, r, typename internal::conditional<NumTraits<Scalar>::IsComplex, internal::true_type, internal::false_type>::type());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -180,8 +180,10 @@ public:
|
|||
RealScalar threshold() const
|
||||
{
|
||||
eigen_assert(m_isInitialized || m_usePrescribedThreshold);
|
||||
// this temporary is needed to workaround a MSVC issue
|
||||
Index diagSize = (std::max<Index>)(1,m_diagSize);
|
||||
return m_usePrescribedThreshold ? m_prescribedThreshold
|
||||
: (std::max<Index>)(1,m_diagSize)*NumTraits<Scalar>::epsilon();
|
||||
: diagSize*NumTraits<Scalar>::epsilon();
|
||||
}
|
||||
|
||||
/** \returns true if \a U (full or thin) is asked for in this SVD decomposition */
|
||||
|
|
|
@ -893,7 +893,7 @@ public:
|
|||
|
||||
Index p = m_outerIndex[outer] + m_innerNonZeros[outer]++;
|
||||
m_data.index(p) = convert_index(inner);
|
||||
return (m_data.value(p) = 0);
|
||||
return (m_data.value(p) = Scalar(0));
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -1274,7 +1274,7 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar&
|
|||
m_innerNonZeros[outer]++;
|
||||
|
||||
m_data.index(p) = inner;
|
||||
return (m_data.value(p) = 0);
|
||||
return (m_data.value(p) = Scalar(0));
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Options, typename _StorageIndex>
|
||||
|
@ -1381,7 +1381,7 @@ EIGEN_DONT_INLINE typename SparseMatrix<_Scalar,_Options,_StorageIndex>::Scalar&
|
|||
}
|
||||
|
||||
m_data.index(p) = inner;
|
||||
return (m_data.value(p) = 0);
|
||||
return (m_data.value(p) = Scalar(0));
|
||||
}
|
||||
|
||||
namespace internal {
|
||||
|
|
|
@ -499,8 +499,6 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
|
|||
eigen_assert(m_analysisIsOk && "analyzePattern() should be called first");
|
||||
eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
|
||||
|
||||
typedef typename IndexVector::Scalar StorageIndex;
|
||||
|
||||
m_isInitialized = true;
|
||||
|
||||
|
||||
|
|
|
@ -146,7 +146,7 @@ void SparseLUImpl<Scalar,StorageIndex>::panel_bmod(const Index m, const Index w,
|
|||
|
||||
Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
|
||||
Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;
|
||||
auto L = MappedMatrixBlock(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
|
||||
MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
|
||||
|
||||
L.setZero();
|
||||
internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride());
|
||||
|
|
|
@ -297,8 +297,8 @@ SluMatrix asSluMatrix(MatrixType& mat)
|
|||
template<typename Scalar, int Flags, typename Index>
|
||||
MappedSparseMatrix<Scalar,Flags,Index> map_superlu(SluMatrix& sluMat)
|
||||
{
|
||||
eigen_assert((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR
|
||||
|| (Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC);
|
||||
eigen_assert(((Flags&RowMajor)==RowMajor && sluMat.Stype == SLU_NR)
|
||||
|| ((Flags&ColMajor)==ColMajor && sluMat.Stype == SLU_NC));
|
||||
|
||||
Index outerSize = (Flags&RowMajor)==RowMajor ? sluMat.ncol : sluMat.nrow;
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
THIS IS NOT THE COMPLETE EIGEN DISTRIBUTION. ONLY FILES NEEDED FOR COMPILING EIGEN INTO SLIC3R WERE PUT INTO THE SLIC3R SOURCE DISTRIBUTION.
|
||||
THIS DIRECTORY CONTAINS PIECES OF THE EIGEN 3.3.5 b3f3d4950030 SOURCE DISTRIBUTION.
|
||||
|
||||
|
||||
**Eigen is a C++ template library for linear algebra: matrices, vectors, numerical solvers, and related algorithms.**
|
||||
|
||||
For more information go to http://eigen.tuxfamily.org/.
|
||||
|
||||
THIS IS NOT THE COMPLETE EIGEN DISTRIBUTION. ONLY FILES NEEDED FOR COMPILING EIGEN INTO SLIC3R WERE PUT INTO THE SLIC3R SOURCE DISTRIBUTION.
|
||||
THIS DIRECTORY CONTAINS PIECES OF THE EIGEN 3.3.7 323c052e1731 SOURCE DISTRIBUTION.
|
||||
|
|
|
@ -931,7 +931,7 @@ class BlockSparseMatrix : public SparseMatrixBase<BlockSparseMatrix<_Scalar,_Blo
|
|||
}
|
||||
|
||||
/**
|
||||
* \returns the starting position of the block <id> in the array of values
|
||||
* \returns the starting position of the block \p id in the array of values
|
||||
*/
|
||||
Index blockPtr(Index id) const
|
||||
{
|
||||
|
|
|
@ -228,6 +228,9 @@ template<typename _Scalar, int _Options, typename _StorageIndex>
|
|||
EIGEN_DEPRECATED inline DynamicSparseMatrix()
|
||||
: m_innerSize(0), m_data(0)
|
||||
{
|
||||
#ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
|
||||
EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
|
||||
#endif
|
||||
eigen_assert(innerSize()==0 && outerSize()==0);
|
||||
}
|
||||
|
||||
|
@ -235,6 +238,9 @@ template<typename _Scalar, int _Options, typename _StorageIndex>
|
|||
EIGEN_DEPRECATED inline DynamicSparseMatrix(Index rows, Index cols)
|
||||
: m_innerSize(0)
|
||||
{
|
||||
#ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
|
||||
EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
|
||||
#endif
|
||||
resize(rows, cols);
|
||||
}
|
||||
|
||||
|
@ -243,12 +249,18 @@ template<typename _Scalar, int _Options, typename _StorageIndex>
|
|||
EIGEN_DEPRECATED explicit inline DynamicSparseMatrix(const SparseMatrixBase<OtherDerived>& other)
|
||||
: m_innerSize(0)
|
||||
{
|
||||
Base::operator=(other.derived());
|
||||
#ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
|
||||
EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
|
||||
#endif
|
||||
Base::operator=(other.derived());
|
||||
}
|
||||
|
||||
inline DynamicSparseMatrix(const DynamicSparseMatrix& other)
|
||||
: Base(), m_innerSize(0)
|
||||
{
|
||||
#ifdef EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
|
||||
EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN
|
||||
#endif
|
||||
*this = other.derived();
|
||||
}
|
||||
|
||||
|
|
|
@ -17,8 +17,8 @@ namespace Eigen {
|
|||
|
||||
namespace internal
|
||||
{
|
||||
template <typename Scalar>
|
||||
inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, Scalar& value)
|
||||
template <typename Scalar,typename IndexType>
|
||||
inline bool GetMarketLine (std::stringstream& line, IndexType& M, IndexType& N, IndexType& i, IndexType& j, Scalar& value)
|
||||
{
|
||||
line >> i >> j >> value;
|
||||
i--;
|
||||
|
@ -30,8 +30,8 @@ namespace internal
|
|||
else
|
||||
return false;
|
||||
}
|
||||
template <typename Scalar>
|
||||
inline bool GetMarketLine (std::stringstream& line, Index& M, Index& N, Index& i, Index& j, std::complex<Scalar>& value)
|
||||
template <typename Scalar,typename IndexType>
|
||||
inline bool GetMarketLine (std::stringstream& line, IndexType& M, IndexType& N, IndexType& i, IndexType& j, std::complex<Scalar>& value)
|
||||
{
|
||||
Scalar valR, valI;
|
||||
line >> i >> j >> valR >> valI;
|
||||
|
@ -134,7 +134,7 @@ template<typename SparseMatrixType>
|
|||
bool loadMarket(SparseMatrixType& mat, const std::string& filename)
|
||||
{
|
||||
typedef typename SparseMatrixType::Scalar Scalar;
|
||||
typedef typename SparseMatrixType::Index Index;
|
||||
typedef typename SparseMatrixType::StorageIndex StorageIndex;
|
||||
std::ifstream input(filename.c_str(),std::ios::in);
|
||||
if(!input)
|
||||
return false;
|
||||
|
@ -144,11 +144,11 @@ bool loadMarket(SparseMatrixType& mat, const std::string& filename)
|
|||
|
||||
bool readsizes = false;
|
||||
|
||||
typedef Triplet<Scalar,Index> T;
|
||||
typedef Triplet<Scalar,StorageIndex> T;
|
||||
std::vector<T> elements;
|
||||
|
||||
Index M(-1), N(-1), NNZ(-1);
|
||||
Index count = 0;
|
||||
StorageIndex M(-1), N(-1), NNZ(-1);
|
||||
StorageIndex count = 0;
|
||||
while(input.getline(buffer, maxBuffersize))
|
||||
{
|
||||
// skip comments
|
||||
|
@ -171,7 +171,7 @@ bool loadMarket(SparseMatrixType& mat, const std::string& filename)
|
|||
}
|
||||
else
|
||||
{
|
||||
Index i(-1), j(-1);
|
||||
StorageIndex i(-1), j(-1);
|
||||
Scalar value;
|
||||
if( internal::GetMarketLine(line, M, N, i, j, value) )
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue