Merge remote-tracking branch 'origin/eigenize' into sla_base_pool

This commit is contained in:
tamasmeszaros 2018-08-21 13:50:09 +02:00
commit 6e77307880
188 changed files with 3351 additions and 3305 deletions

View file

@ -38,7 +38,6 @@ if ($gui) {
%prereqs = qw( %prereqs = qw(
Class::Accessor 0 Class::Accessor 0
Wx 0.9918 Wx 0.9918
Socket 2.016
); );
%recommends = qw( %recommends = qw(
Wx::GLCanvas 0 Wx::GLCanvas 0

View file

@ -1,5 +1,4 @@
# Boost 1.63 requires CMake 3.7 or newer cmake_minimum_required(VERSION 3.2)
cmake_minimum_required(VERSION 2.8)
project(Slic3r) project(Slic3r)
@ -23,6 +22,7 @@ option(SLIC3R_GUI "Compile Slic3r with GUI components (OpenGL, wxWidgets)"
option(SLIC3R_PRUSACONTROL "Compile Slic3r with the PrusaControl prject file format (requires wxWidgets base library)" 1) option(SLIC3R_PRUSACONTROL "Compile Slic3r with the PrusaControl prject file format (requires wxWidgets base library)" 1)
option(SLIC3R_PROFILE "Compile Slic3r with an invasive Shiny profiler" 0) option(SLIC3R_PROFILE "Compile Slic3r with an invasive Shiny profiler" 0)
option(SLIC3R_MSVC_COMPILE_PARALLEL "Compile on Visual Studio in parallel" 1) option(SLIC3R_MSVC_COMPILE_PARALLEL "Compile on Visual Studio in parallel" 1)
option(SLIC3R_MSVC_PDB "Generate PDB files on MSVC in Release mode" 1)
if (MSVC AND SLIC3R_MSVC_COMPILE_PARALLEL) if (MSVC AND SLIC3R_MSVC_COMPILE_PARALLEL)
add_compile_options(/MP) add_compile_options(/MP)
@ -36,10 +36,18 @@ else()
set(ENV_PATH_SEPARATOR ":") set(ENV_PATH_SEPARATOR ":")
endif() endif()
set(ENV{PATH} "${PROJECT_SOURCE_DIR}/local-lib/bin${ENV_PATH_SEPARATOR}$ENV{PATH}") set(ENV{PATH} "${PROJECT_SOURCE_DIR}/local-lib/bin${ENV_PATH_SEPARATOR}$ENV{PATH}")
set(ENV{PERL5LIB} "${PROJECT_SOURCE_DIR}/local-lib/lib/perl${ENV_PATH_SEPARATOR}$ENV{PERL5LIB}") set(PERL_INCLUDE "${PROJECT_SOURCE_DIR}/local-lib/lib/perl5${ENV_PATH_SEPARATOR}$ENV{PERL5LIB}")
message("PATH: $ENV{PATH}") message("PATH: $ENV{PATH}")
message("PERL5LIB: $ENV{PERL5LIB}") message("PERL_INCLUDE: ${PERL_INCLUDE}")
find_package(Perl REQUIRED) find_package(Perl REQUIRED)
if (WIN32)
# On Windows passing the PERL5LIB variable causes various problems (such as with MAX_PATH and others),
# basically I've found no good way to do it on Windows.
set(PERL5LIB_ENV_CMD "")
else()
set(PERL5LIB_ENV_CMD ${CMAKE_COMMAND} -E env PERL5LIB=${PERL_INCLUDE})
endif()
# CMAKE_PREFIX_PATH is used to point CMake to the remaining dependencies (Boost, TBB, ...) # CMAKE_PREFIX_PATH is used to point CMake to the remaining dependencies (Boost, TBB, ...)
# We pick it from environment if it is not defined in another way # We pick it from environment if it is not defined in another way

View file

@ -7,18 +7,6 @@ use warnings;
use List::Util qw(first); use List::Util qw(first);
use Slic3r::Geometry::Clipper qw(union_ex diff_pl); use Slic3r::Geometry::Clipper qw(union_ex diff_pl);
sub wkt {
my $self = shift;
return sprintf "POLYGON(%s)",
join ',', map "($_)", map { join ',', map "$_->[0] $_->[1]", @$_ } @$self;
}
sub dump_perl {
my $self = shift;
return sprintf "[%s]",
join ',', map "[$_]", map { join ',', map "[$_->[0],$_->[1]]", @$_ } @$self;
}
sub offset { sub offset {
my $self = shift; my $self = shift;
return Slic3r::Geometry::Clipper::offset(\@$self, @_); return Slic3r::Geometry::Clipper::offset(\@$self, @_);

View file

@ -1,4 +1,5 @@
# Bed shape dialog # Bed shape dialog
# still used by the Slic3r::GUI::Controller::ManualControlDialog Perl module.
package Slic3r::GUI::2DBed; package Slic3r::GUI::2DBed;
use strict; use strict;

View file

@ -7,11 +7,6 @@ sub new_scale {
return $class->new(map Slic3r::Geometry::scale($_), @_); return $class->new(map Slic3r::Geometry::scale($_), @_);
} }
sub dump_perl {
my $self = shift;
return sprintf "[%s,%s]", @$self;
}
package Slic3r::Pointf; package Slic3r::Pointf;
use strict; use strict;
use warnings; use warnings;

View file

@ -10,9 +10,4 @@ sub new_scale {
return $class->new(map [ Slic3r::Geometry::scale($_->[X]), Slic3r::Geometry::scale($_->[Y]) ], @points); return $class->new(map [ Slic3r::Geometry::scale($_->[X]), Slic3r::Geometry::scale($_->[Y]) ], @points);
} }
sub dump_perl {
my $self = shift;
return sprintf "[%s]", join ',', map "[$_->[0],$_->[1]]", @$self;
}
1; 1;

View file

@ -420,7 +420,7 @@ set(MyTypemap ${CMAKE_CURRENT_BINARY_DIR}/typemap)
add_custom_command( add_custom_command(
OUTPUT ${MyTypemap} OUTPUT ${MyTypemap}
DEPENDS ${CMAKE_CURRENT_LIST_DIR}/xsp/my.map DEPENDS ${CMAKE_CURRENT_LIST_DIR}/xsp/my.map
COMMAND ${PERL_EXECUTABLE} -MExtUtils::Typemaps -MExtUtils::Typemaps::Basic -e "$typemap = ExtUtils::Typemaps->new(file => \"${CMAKE_CURRENT_LIST_DIR}/xsp/my.map\"); $typemap->merge(typemap => ExtUtils::Typemaps::Basic->new); $typemap->write(file => \"${MyTypemap}\")" COMMAND ${PERL5LIB_ENV_CMD} ${PERL_EXECUTABLE} -MExtUtils::Typemaps -MExtUtils::Typemaps::Basic -e "$typemap = ExtUtils::Typemaps->new(file => \"${CMAKE_CURRENT_LIST_DIR}/xsp/my.map\"); $typemap->merge(typemap => ExtUtils::Typemaps::Basic->new); $typemap->write(file => \"${MyTypemap}\")"
VERBATIM VERBATIM
) )
@ -485,7 +485,8 @@ set(XS_MAIN_CPP ${CMAKE_CURRENT_BINARY_DIR}/XS.cpp)
add_custom_command( add_custom_command(
OUTPUT ${XS_MAIN_CPP} OUTPUT ${XS_MAIN_CPP}
DEPENDS ${MyTypemap} ${XS_XSP_FILES} ${CMAKE_CURRENT_LIST_DIR}/xsp/typemap.xspt DEPENDS ${MyTypemap} ${XS_XSP_FILES} ${CMAKE_CURRENT_LIST_DIR}/xsp/typemap.xspt
COMMAND COMMAND xsubpp -typemap typemap -output ${XS_MAIN_CPP} -hiertype ${XS_MAIN_XS} COMMAND ${PERL5LIB_ENV_CMD} xsubpp -typemap typemap -output ${XS_MAIN_CPP} -hiertype ${XS_MAIN_XS}
VERBATIM
) )
# Define the Perl XS shared library. # Define the Perl XS shared library.
@ -549,6 +550,14 @@ if (WIN32)
target_compile_definitions(XS PRIVATE -DNOGDI -DNOMINMAX -DHAS_BOOL) target_compile_definitions(XS PRIVATE -DNOGDI -DNOMINMAX -DHAS_BOOL)
endif () endif ()
# SLIC3R_MSVC_PDB
if (MSVC AND SLIC3R_MSVC_PDB AND ${CMAKE_BUILD_TYPE} STREQUAL "Release")
set_target_properties(XS PROPERTIES
COMPILE_FLAGS "/Zi"
LINK_FLAGS "/DEBUG /OPT:REF /OPT:ICF"
)
endif()
## Configuration flags ## Configuration flags
if (SLIC3R_GUI) if (SLIC3R_GUI)
message("Slic3r will be built with GUI support") message("Slic3r will be built with GUI support")

View file

@ -9,6 +9,7 @@
#define EIGEN_CHOLESKY_MODULE_H #define EIGEN_CHOLESKY_MODULE_H
#include "Core" #include "Core"
#include "Jacobi"
#include "src/Core/util/DisableStupidWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
@ -31,7 +32,11 @@
#include "src/Cholesky/LLT.h" #include "src/Cholesky/LLT.h"
#include "src/Cholesky/LDLT.h" #include "src/Cholesky/LDLT.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/Cholesky/LLT_LAPACKE.h" #include "src/Cholesky/LLT_LAPACKE.h"
#endif #endif

View file

@ -14,6 +14,22 @@
// first thing Eigen does: stop the compiler from committing suicide // first thing Eigen does: stop the compiler from committing suicide
#include "src/Core/util/DisableStupidWarnings.h" #include "src/Core/util/DisableStupidWarnings.h"
#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA)
#define EIGEN_CUDACC __CUDACC__
#endif
#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA)
#define EIGEN_CUDA_ARCH __CUDA_ARCH__
#endif
#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9)
#define EIGEN_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100))
#elif defined(__CUDACC_VER__)
#define EIGEN_CUDACC_VER __CUDACC_VER__
#else
#define EIGEN_CUDACC_VER 0
#endif
// Handle NVCC/CUDA/SYCL // Handle NVCC/CUDA/SYCL
#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__)
// Do not try asserts on CUDA and SYCL! // Do not try asserts on CUDA and SYCL!
@ -155,6 +171,9 @@
#ifdef __AVX512DQ__ #ifdef __AVX512DQ__
#define EIGEN_VECTORIZE_AVX512DQ #define EIGEN_VECTORIZE_AVX512DQ
#endif #endif
#ifdef __AVX512ER__
#define EIGEN_VECTORIZE_AVX512ER
#endif
#endif #endif
// include files // include files
@ -229,7 +248,7 @@
#if defined __CUDACC__ #if defined __CUDACC__
#define EIGEN_VECTORIZE_CUDA #define EIGEN_VECTORIZE_CUDA
#include <vector_types.h> #include <vector_types.h>
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 #if EIGEN_CUDACC_VER >= 70500
#define EIGEN_HAS_CUDA_FP16 #define EIGEN_HAS_CUDA_FP16
#endif #endif
#endif #endif
@ -352,6 +371,7 @@ using std::ptrdiff_t;
#include "src/Core/MathFunctions.h" #include "src/Core/MathFunctions.h"
#include "src/Core/GenericPacketMath.h" #include "src/Core/GenericPacketMath.h"
#include "src/Core/MathFunctionsImpl.h" #include "src/Core/MathFunctionsImpl.h"
#include "src/Core/arch/Default/ConjHelper.h"
#if defined EIGEN_VECTORIZE_AVX512 #if defined EIGEN_VECTORIZE_AVX512
#include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/PacketMath.h"
@ -367,6 +387,7 @@ using std::ptrdiff_t;
#include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX/MathFunctions.h"
#include "src/Core/arch/AVX/Complex.h" #include "src/Core/arch/AVX/Complex.h"
#include "src/Core/arch/AVX/TypeCasting.h" #include "src/Core/arch/AVX/TypeCasting.h"
#include "src/Core/arch/SSE/TypeCasting.h"
#elif defined EIGEN_VECTORIZE_SSE #elif defined EIGEN_VECTORIZE_SSE
#include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/PacketMath.h"
#include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/MathFunctions.h"

View file

@ -45,7 +45,11 @@
#include "src/Eigenvalues/GeneralizedEigenSolver.h" #include "src/Eigenvalues/GeneralizedEigenSolver.h"
#include "src/Eigenvalues/MatrixBaseEigenvalues.h" #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/Eigenvalues/RealSchur_LAPACKE.h" #include "src/Eigenvalues/RealSchur_LAPACKE.h"
#include "src/Eigenvalues/ComplexSchur_LAPACKE.h" #include "src/Eigenvalues/ComplexSchur_LAPACKE.h"
#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" #include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h"

View file

@ -28,7 +28,11 @@
#include "src/LU/FullPivLU.h" #include "src/LU/FullPivLU.h"
#include "src/LU/PartialPivLU.h" #include "src/LU/PartialPivLU.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/LU/PartialPivLU_LAPACKE.h" #include "src/LU/PartialPivLU_LAPACKE.h"
#endif #endif
#include "src/LU/Determinant.h" #include "src/LU/Determinant.h"

View file

@ -36,7 +36,11 @@
#include "src/QR/ColPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h"
#include "src/QR/CompleteOrthogonalDecomposition.h" #include "src/QR/CompleteOrthogonalDecomposition.h"
#ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_LAPACKE
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/QR/HouseholderQR_LAPACKE.h" #include "src/QR/HouseholderQR_LAPACKE.h"
#include "src/QR/ColPivHouseholderQR_LAPACKE.h" #include "src/QR/ColPivHouseholderQR_LAPACKE.h"
#endif #endif

View file

@ -27,7 +27,7 @@ void qFree(void *ptr)
void *qRealloc(void *ptr, std::size_t size) void *qRealloc(void *ptr, std::size_t size)
{ {
void* newPtr = Eigen::internal::aligned_malloc(size); void* newPtr = Eigen::internal::aligned_malloc(size);
memcpy(newPtr, ptr, size); std::memcpy(newPtr, ptr, size);
Eigen::internal::aligned_free(ptr); Eigen::internal::aligned_free(ptr);
return newPtr; return newPtr;
} }

View file

@ -37,7 +37,11 @@
#include "src/SVD/JacobiSVD.h" #include "src/SVD/JacobiSVD.h"
#include "src/SVD/BDCSVD.h" #include "src/SVD/BDCSVD.h"
#if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT)
#ifdef EIGEN_USE_MKL
#include "mkl_lapacke.h"
#else
#include "src/misc/lapacke.h" #include "src/misc/lapacke.h"
#endif
#include "src/SVD/JacobiSVD_LAPACKE.h" #include "src/SVD/JacobiSVD_LAPACKE.h"
#endif #endif

View file

@ -248,7 +248,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
/** \brief Reports whether previous computation was successful. /** \brief Reports whether previous computation was successful.
* *
* \returns \c Success if computation was succesful, * \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative. * \c NumericalIssue if the factorization failed because of a zero pivot.
*/ */
ComputationInfo info() const ComputationInfo info() const
{ {
@ -376,6 +376,8 @@ template<> struct ldlt_inplace<Lower>
if((rs>0) && pivot_is_valid) if((rs>0) && pivot_is_valid)
A21 /= realAkk; A21 /= realAkk;
else if(rs>0)
ret = ret && (A21.array()==Scalar(0)).all();
if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed
else if(!pivot_is_valid) found_zero_pivot = true; else if(!pivot_is_valid) found_zero_pivot = true;
@ -568,13 +570,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons
// more precisely, use pseudo-inverse of D (see bug 241) // more precisely, use pseudo-inverse of D (see bug 241)
using std::abs; using std::abs;
const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD()); const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD());
// In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon // In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min())
// as motivated by LAPACK's xGELSS: // and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS:
// RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest()); // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest());
// However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest
// diagonal element is not well justified and leads to numerical issues in some cases. // diagonal element is not well justified and leads to numerical issues in some cases.
// Moreover, Lapack's xSYTRS routines use 0 for the tolerance. // Moreover, Lapack's xSYTRS routines use 0 for the tolerance.
RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest(); // Using numeric_limits::min() gives us more robustness to denormals.
RealScalar tolerance = (std::numeric_limits<RealScalar>::min)();
for (Index i = 0; i < vecD.size(); ++i) for (Index i = 0; i < vecD.size(); ++i)
{ {

View file

@ -41,14 +41,18 @@ template<typename MatrixType, int UpLo> struct LLT_Traits;
* Example: \include LLT_example.cpp * Example: \include LLT_example.cpp
* Output: \verbinclude LLT_example.out * Output: \verbinclude LLT_example.out
* *
* \b Performance: for best performance, it is recommended to use a column-major storage format
* with the Lower triangular part (the default), or, equivalently, a row-major storage format
* with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization
* step, and rank-updates can be up to 3 times slower.
*
* This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
* *
* Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered.
* Therefore, the strict lower part does not have to store correct values.
*
* \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT
*/ */
/* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH)
* Note that during the decomposition, only the upper triangular part of A is considered. Therefore,
* the strict lower part does not have to store correct values.
*/
template<typename _MatrixType, int _UpLo> class LLT template<typename _MatrixType, int _UpLo> class LLT
{ {
public: public:
@ -146,7 +150,7 @@ template<typename _MatrixType, int _UpLo> class LLT
} }
template<typename Derived> template<typename Derived>
void solveInPlace(MatrixBase<Derived> &bAndX) const; void solveInPlace(const MatrixBase<Derived> &bAndX) const;
template<typename InputType> template<typename InputType>
LLT& compute(const EigenBase<InputType>& matrix); LLT& compute(const EigenBase<InputType>& matrix);
@ -177,7 +181,7 @@ template<typename _MatrixType, int _UpLo> class LLT
/** \brief Reports whether previous computation was successful. /** \brief Reports whether previous computation was successful.
* *
* \returns \c Success if computation was succesful, * \returns \c Success if computation was succesful,
* \c NumericalIssue if the matrix.appears to be negative. * \c NumericalIssue if the matrix.appears not to be positive definite.
*/ */
ComputationInfo info() const ComputationInfo info() const
{ {
@ -425,6 +429,7 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType>
eigen_assert(a.rows()==a.cols()); eigen_assert(a.rows()==a.cols());
const Index size = a.rows(); const Index size = a.rows();
m_matrix.resize(size, size); m_matrix.resize(size, size);
if (!internal::is_same_dense(m_matrix, a.derived()))
m_matrix = a.derived(); m_matrix = a.derived();
// Compute matrix L1 norm = max abs column sum. // Compute matrix L1 norm = max abs column sum.
@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const
* *
* This version avoids a copy when the right hand side matrix b is not needed anymore. * This version avoids a copy when the right hand side matrix b is not needed anymore.
* *
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
* This function will const_cast it, so constness isn't honored here.
*
* \sa LLT::solve(), MatrixBase::llt() * \sa LLT::solve(), MatrixBase::llt()
*/ */
template<typename MatrixType, int _UpLo> template<typename MatrixType, int _UpLo>
template<typename Derived> template<typename Derived>
void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const
{ {
eigen_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_isInitialized && "LLT is not initialized.");
eigen_assert(m_matrix.rows()==bAndX.rows()); eigen_assert(m_matrix.rows()==bAndX.rows());

View file

@ -231,10 +231,16 @@ class Array
: Base(other) : Base(other)
{ } { }
private:
struct PrivateType {};
public:
/** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other) EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other,
typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value,
PrivateType>::type = PrivateType())
: Base(other.derived()) : Base(other.derived())
{ } { }

View file

@ -175,7 +175,7 @@ template<typename Derived> class ArrayBase
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other) ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -188,7 +188,7 @@ ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
{ {
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -201,7 +201,7 @@ ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
{ {
call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::mul_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -214,7 +214,7 @@ ArrayBase<Derived>::operator*=(const ArrayBase<OtherDerived>& other)
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_STRONG_INLINE Derived & EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived &
ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other) ArrayBase<Derived>::operator/=(const ArrayBase<OtherDerived>& other)
{ {
call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::div_assign_op<Scalar,typename OtherDerived::Scalar>());

View file

@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> >
// Let's remove NestByRefBit // Let's remove NestByRefBit
enum { enum {
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
Flags = Flags0 & ~NestByRefBit LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
}; };
}; };
} }
@ -129,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> >
// Let's remove NestByRefBit // Let's remove NestByRefBit
enum { enum {
Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags,
Flags = Flags0 & ~NestByRefBit LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0,
Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag
}; };
}; };
} }

View file

@ -39,7 +39,7 @@ public:
enum { enum {
DstAlignment = DstEvaluator::Alignment, DstAlignment = DstEvaluator::Alignment,
SrcAlignment = SrcEvaluator::Alignment, SrcAlignment = SrcEvaluator::Alignment,
DstHasDirectAccess = DstFlags & DirectAccessBit, DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
}; };
@ -83,7 +83,7 @@ private:
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll, /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */ so it's only good for large enough sizes. */

View file

@ -84,7 +84,8 @@ class vml_assign_traits
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \ struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \ Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \ typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \ static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \ if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \ Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \ typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \ const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \ static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \ VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \ if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \

View file

@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
? int(outer_stride_at_compile_time<ArgType>::ret) ? int(outer_stride_at_compile_time<ArgType>::ret)
: int(inner_stride_at_compile_time<ArgType>::ret), : int(inner_stride_at_compile_time<ArgType>::ret),
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit, FlagsRowMajorBit = XprType::Flags&RowMajorBit,
@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
PacketAlignment = unpacket_traits<PacketScalar>::alignment, PacketAlignment = unpacket_traits<PacketScalar>::alignment,
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
&& (OuterStrideAtCompileTime!=0)
&& (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0) Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
}; };
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
: m_argImpl(block.nestedExpression()), : m_argImpl(block.nestedExpression()),
m_startRow(block.startRow()), m_startRow(block.startRow()),
m_startCol(block.startCol()) m_startCol(block.startCol()),
m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
{ } { }
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
enum { enum {
RowsAtCompileTime = XprType::RowsAtCompileTime RowsAtCompileTime = XprType::RowsAtCompileTime,
ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
}; };
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@ -1037,6 +1041,9 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const CoeffReturnType coeff(Index index) const
{ {
if (ForwardLinearAccess)
return m_argImpl.coeff(m_linear_offset.value() + index);
else
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
} }
@ -1049,6 +1056,9 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index) Scalar& coeffRef(Index index)
{ {
if (ForwardLinearAccess)
return m_argImpl.coeffRef(m_linear_offset.value() + index);
else
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
} }
@ -1063,6 +1073,9 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_STRONG_INLINE EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
if (ForwardLinearAccess)
return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);
else
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0); RowsAtCompileTime == 1 ? index : 0);
} }
@ -1078,6 +1091,9 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_STRONG_INLINE EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
if (ForwardLinearAccess)
return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);
else
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0, RowsAtCompileTime == 1 ? index : 0,
x); x);
@ -1087,6 +1103,7 @@ protected:
evaluator<ArgType> m_argImpl; evaluator<ArgType> m_argImpl;
const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow; const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol; const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
}; };
// TODO: This evaluator does not actually use the child evaluator; // TODO: This evaluator does not actually use the child evaluator;

View file

@ -105,7 +105,7 @@ class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp<NullaryOp
*/ */
template<typename Derived> template<typename Derived>
template<typename CustomNullaryOp> template<typename CustomNullaryOp>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func)
{ {
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func); return CwiseNullaryOp<CustomNullaryOp, PlainObject>(rows, cols, func);
@ -150,7 +150,7 @@ DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func)
*/ */
template<typename Derived> template<typename Derived>
template<typename CustomNullaryOp> template<typename CustomNullaryOp>
EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject>
DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func) DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func)
{ {
return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func); return CwiseNullaryOp<CustomNullaryOp, PlainObject>(RowsAtCompileTime, ColsAtCompileTime, func);
@ -192,7 +192,7 @@ DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value)
* \sa class CwiseNullaryOp * \sa class CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(Index size, const Scalar& value) DenseBase<Derived>::Constant(Index size, const Scalar& value)
{ {
return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value)); return DenseBase<Derived>::NullaryExpr(size, internal::scalar_constant_op<Scalar>(value));
@ -208,7 +208,7 @@ DenseBase<Derived>::Constant(Index size, const Scalar& value)
* \sa class CwiseNullaryOp * \sa class CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Constant(const Scalar& value) DenseBase<Derived>::Constant(const Scalar& value)
{ {
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@ -220,7 +220,7 @@ DenseBase<Derived>::Constant(const Scalar& value)
* \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&) * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -232,7 +232,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, Index size, const Scalar& low, const
* \sa LinSpaced(Scalar,Scalar) * \sa LinSpaced(Scalar,Scalar)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -264,7 +264,7 @@ DenseBase<Derived>::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig
* \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high) DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -276,7 +276,7 @@ DenseBase<Derived>::LinSpaced(Index size, const Scalar& low, const Scalar& high)
* Special version for fixed size types which does not require the size parameter. * Special version for fixed size types which does not require the size parameter.
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high) DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@ -286,7 +286,7 @@ DenseBase<Derived>::LinSpaced(const Scalar& low, const Scalar& high)
/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */
template<typename Derived> template<typename Derived>
bool DenseBase<Derived>::isApproxToConstant EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApproxToConstant
(const Scalar& val, const RealScalar& prec) const (const Scalar& val, const RealScalar& prec) const
{ {
typename internal::nested_eval<Derived,1>::type self(derived()); typename internal::nested_eval<Derived,1>::type self(derived());
@ -301,7 +301,7 @@ bool DenseBase<Derived>::isApproxToConstant
* *
* \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
template<typename Derived> template<typename Derived>
bool DenseBase<Derived>::isConstant EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isConstant
(const Scalar& val, const RealScalar& prec) const (const Scalar& val, const RealScalar& prec) const
{ {
return isApproxToConstant(val, prec); return isApproxToConstant(val, prec);
@ -312,7 +312,7 @@ bool DenseBase<Derived>::isConstant
* \sa setConstant(), Constant(), class CwiseNullaryOp * \sa setConstant(), Constant(), class CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
{ {
setConstant(val); setConstant(val);
} }
@ -322,7 +322,7 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::fill(const Scalar& val)
* \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
{ {
return derived() = Constant(rows(), cols(), val); return derived() = Constant(rows(), cols(), val);
} }
@ -337,7 +337,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setConstant(const Scalar& val)
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val) PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
{ {
resize(size); resize(size);
@ -356,7 +356,7 @@ PlainObjectBase<Derived>::setConstant(Index size, const Scalar& val)
* \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val) PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
{ {
resize(rows, cols); resize(rows, cols);
@ -380,7 +380,7 @@ PlainObjectBase<Derived>::setConstant(Index rows, Index cols, const Scalar& val)
* \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize)); return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op<Scalar,PacketScalar>(low,high,newSize));
@ -400,7 +400,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Index newSize, con
* \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low, const Scalar& high)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return setLinSpaced(size(), low, high); return setLinSpaced(size(), low, high);
@ -423,7 +423,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(const Scalar& low,
* \sa Zero(), Zero(Index) * \sa Zero(), Zero(Index)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Zero(Index rows, Index cols) DenseBase<Derived>::Zero(Index rows, Index cols)
{ {
return Constant(rows, cols, Scalar(0)); return Constant(rows, cols, Scalar(0));
@ -446,7 +446,7 @@ DenseBase<Derived>::Zero(Index rows, Index cols)
* \sa Zero(), Zero(Index,Index) * \sa Zero(), Zero(Index,Index)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Zero(Index size) DenseBase<Derived>::Zero(Index size)
{ {
return Constant(size, Scalar(0)); return Constant(size, Scalar(0));
@ -463,7 +463,7 @@ DenseBase<Derived>::Zero(Index size)
* \sa Zero(Index), Zero(Index,Index) * \sa Zero(Index), Zero(Index,Index)
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Zero() DenseBase<Derived>::Zero()
{ {
return Constant(Scalar(0)); return Constant(Scalar(0));
@ -478,7 +478,7 @@ DenseBase<Derived>::Zero()
* \sa class CwiseNullaryOp, Zero() * \sa class CwiseNullaryOp, Zero()
*/ */
template<typename Derived> template<typename Derived>
bool DenseBase<Derived>::isZero(const RealScalar& prec) const EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isZero(const RealScalar& prec) const
{ {
typename internal::nested_eval<Derived,1>::type self(derived()); typename internal::nested_eval<Derived,1>::type self(derived());
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
@ -496,7 +496,7 @@ bool DenseBase<Derived>::isZero(const RealScalar& prec) const
* \sa class CwiseNullaryOp, Zero() * \sa class CwiseNullaryOp, Zero()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
{ {
return setConstant(Scalar(0)); return setConstant(Scalar(0));
} }
@ -511,7 +511,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setZero()
* \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero() * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setZero(Index newSize) PlainObjectBase<Derived>::setZero(Index newSize)
{ {
resize(newSize); resize(newSize);
@ -529,7 +529,7 @@ PlainObjectBase<Derived>::setZero(Index newSize)
* \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero() * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setZero(Index rows, Index cols) PlainObjectBase<Derived>::setZero(Index rows, Index cols)
{ {
resize(rows, cols); resize(rows, cols);
@ -553,7 +553,7 @@ PlainObjectBase<Derived>::setZero(Index rows, Index cols)
* \sa Ones(), Ones(Index), isOnes(), class Ones * \sa Ones(), Ones(Index), isOnes(), class Ones
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Ones(Index rows, Index cols) DenseBase<Derived>::Ones(Index rows, Index cols)
{ {
return Constant(rows, cols, Scalar(1)); return Constant(rows, cols, Scalar(1));
@ -576,7 +576,7 @@ DenseBase<Derived>::Ones(Index rows, Index cols)
* \sa Ones(), Ones(Index,Index), isOnes(), class Ones * \sa Ones(), Ones(Index,Index), isOnes(), class Ones
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Ones(Index newSize) DenseBase<Derived>::Ones(Index newSize)
{ {
return Constant(newSize, Scalar(1)); return Constant(newSize, Scalar(1));
@ -593,7 +593,7 @@ DenseBase<Derived>::Ones(Index newSize)
* \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType
DenseBase<Derived>::Ones() DenseBase<Derived>::Ones()
{ {
return Constant(Scalar(1)); return Constant(Scalar(1));
@ -608,7 +608,7 @@ DenseBase<Derived>::Ones()
* \sa class CwiseNullaryOp, Ones() * \sa class CwiseNullaryOp, Ones()
*/ */
template<typename Derived> template<typename Derived>
bool DenseBase<Derived>::isOnes EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isOnes
(const RealScalar& prec) const (const RealScalar& prec) const
{ {
return isApproxToConstant(Scalar(1), prec); return isApproxToConstant(Scalar(1), prec);
@ -622,7 +622,7 @@ bool DenseBase<Derived>::isOnes
* \sa class CwiseNullaryOp, Ones() * \sa class CwiseNullaryOp, Ones()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
{ {
return setConstant(Scalar(1)); return setConstant(Scalar(1));
} }
@ -637,7 +637,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setOnes()
* \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones() * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setOnes(Index newSize) PlainObjectBase<Derived>::setOnes(Index newSize)
{ {
resize(newSize); resize(newSize);
@ -655,7 +655,7 @@ PlainObjectBase<Derived>::setOnes(Index newSize)
* \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones() * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived&
PlainObjectBase<Derived>::setOnes(Index rows, Index cols) PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
{ {
resize(rows, cols); resize(rows, cols);
@ -679,7 +679,7 @@ PlainObjectBase<Derived>::setOnes(Index rows, Index cols)
* \sa Identity(), setIdentity(), isIdentity() * \sa Identity(), setIdentity(), isIdentity()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity(Index rows, Index cols) MatrixBase<Derived>::Identity(Index rows, Index cols)
{ {
return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>()); return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_identity_op<Scalar>());
@ -696,7 +696,7 @@ MatrixBase<Derived>::Identity(Index rows, Index cols)
* \sa Identity(Index,Index), setIdentity(), isIdentity() * \sa Identity(Index,Index), setIdentity(), isIdentity()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::IdentityReturnType
MatrixBase<Derived>::Identity() MatrixBase<Derived>::Identity()
{ {
EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
@ -771,7 +771,7 @@ struct setIdentity_impl<Derived, true>
* \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity() * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
{ {
return internal::setIdentity_impl<Derived>::run(derived()); return internal::setIdentity_impl<Derived>::run(derived());
} }
@ -787,7 +787,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity()
* \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index cols)
{ {
derived().resize(rows, cols); derived().resize(rows, cols);
return setIdentity(); return setIdentity();
@ -800,7 +800,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::setIdentity(Index rows, Index
* \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index newSize, Index i)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i); return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i);
@ -815,7 +815,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::Unit(Index i)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return BasisReturnType(SquareMatrixType::Identity(),i); return BasisReturnType(SquareMatrixType::Identity(),i);
@ -828,7 +828,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitX()
{ return Derived::Unit(0); } { return Derived::Unit(0); }
/** \returns an expression of the Y axis unit vector (0,1{,0}^*) /** \returns an expression of the Y axis unit vector (0,1{,0}^*)
@ -838,7 +838,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitY()
{ return Derived::Unit(1); } { return Derived::Unit(1); }
/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*) /** \returns an expression of the Z axis unit vector (0,0,1{,0}^*)
@ -848,7 +848,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitZ()
{ return Derived::Unit(2); } { return Derived::Unit(2); }
/** \returns an expression of the W axis unit vector (0,0,0,1) /** \returns an expression of the W axis unit vector (0,0,0,1)
@ -858,7 +858,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBa
* \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW()
*/ */
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW() EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::BasisReturnType MatrixBase<Derived>::UnitW()
{ return Derived::Unit(3); } { return Derived::Unit(3); }
} // end namespace Eigen } // end namespace Eigen

View file

@ -296,7 +296,7 @@ template<typename Derived> class DenseBase
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
Derived& operator=(const ReturnByValue<OtherDerived>& func); Derived& operator=(const ReturnByValue<OtherDerived>& func);
/** \ínternal /** \internal
* Copies \a other into *this without evaluating other. \returns a reference to *this. * Copies \a other into *this without evaluating other. \returns a reference to *this.
* \deprecated */ * \deprecated */
template<typename OtherDerived> template<typename OtherDerived>
@ -484,9 +484,9 @@ template<typename Derived> class DenseBase
return derived().coeff(0,0); return derived().coeff(0,0);
} }
bool all() const; EIGEN_DEVICE_FUNC bool all() const;
bool any() const; EIGEN_DEVICE_FUNC bool any() const;
Index count() const; EIGEN_DEVICE_FUNC Index count() const;
typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType; typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType; typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;

View file

@ -70,7 +70,10 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
{
eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)

View file

@ -31,7 +31,8 @@ struct dot_nocheck
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod; typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
typedef typename conj_prod::result_type ResScalar; typedef typename conj_prod::result_type ResScalar;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) EIGEN_STRONG_INLINE
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{ {
return a.template binaryExpr<conj_prod>(b).sum(); return a.template binaryExpr<conj_prod>(b).sum();
} }
@ -43,7 +44,8 @@ struct dot_nocheck<T, U, true>
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod; typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
typedef typename conj_prod::result_type ResScalar; typedef typename conj_prod::result_type ResScalar;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) EIGEN_STRONG_INLINE
static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{ {
return a.transpose().template binaryExpr<conj_prod>(b).sum(); return a.transpose().template binaryExpr<conj_prod>(b).sum();
} }
@ -65,6 +67,7 @@ struct dot_nocheck<T, U, true>
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
{ {
@ -102,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
* \sa lpNorm(), dot(), squaredNorm() * \sa lpNorm(), dot(), squaredNorm()
*/ */
template<typename Derived> template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{ {
return numext::sqrt(squaredNorm()); return numext::sqrt(squaredNorm());
} }
@ -117,7 +120,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real Matr
* \sa norm(), normalize() * \sa norm(), normalize()
*/ */
template<typename Derived> template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::normalized() const MatrixBase<Derived>::normalized() const
{ {
typedef typename internal::nested_eval<Derived,2>::type _Nested; typedef typename internal::nested_eval<Derived,2>::type _Nested;
@ -139,7 +142,7 @@ MatrixBase<Derived>::normalized() const
* \sa norm(), normalized() * \sa norm(), normalized()
*/ */
template<typename Derived> template<typename Derived>
inline void MatrixBase<Derived>::normalize() EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
{ {
RealScalar z = squaredNorm(); RealScalar z = squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
@ -160,7 +163,7 @@ inline void MatrixBase<Derived>::normalize()
* \sa stableNorm(), stableNormalize(), normalized() * \sa stableNorm(), stableNormalize(), normalized()
*/ */
template<typename Derived> template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::stableNormalized() const MatrixBase<Derived>::stableNormalized() const
{ {
typedef typename internal::nested_eval<Derived,3>::type _Nested; typedef typename internal::nested_eval<Derived,3>::type _Nested;
@ -185,7 +188,7 @@ MatrixBase<Derived>::stableNormalized() const
* \sa stableNorm(), stableNormalized(), normalize() * \sa stableNorm(), stableNormalized(), normalize()
*/ */
template<typename Derived> template<typename Derived>
inline void MatrixBase<Derived>::stableNormalize() EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
{ {
RealScalar w = cwiseAbs().maxCoeff(); RealScalar w = cwiseAbs().maxCoeff();
RealScalar z = (derived()/w).squaredNorm(); RealScalar z = (derived()/w).squaredNorm();

View file

@ -14,6 +14,7 @@
namespace Eigen { namespace Eigen {
/** \class EigenBase /** \class EigenBase
* \ingroup Core_Module
* *
* Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
* *
@ -128,6 +129,7 @@ template<typename Derived> struct EigenBase
*/ */
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other) Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived()); call_assignment(derived(), other.derived());
@ -136,6 +138,7 @@ Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other) Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
@ -144,6 +147,7 @@ Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other) Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
{ {
call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>()); call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());

View file

@ -24,9 +24,14 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category template<int Size, int MaxSize> struct product_size_category
{ {
enum { is_large = MaxSize == Dynamic || enum {
#ifndef EIGEN_CUDA_ARCH
is_large = MaxSize == Dynamic ||
Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
(Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
#else
is_large = 0,
#endif
value = is_large ? Large value = is_large ? Large
: Size == 1 ? 1 : Size == 1 ? 1
: Small : Small
@ -379,8 +384,6 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
* *
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/ */
#ifndef __CUDACC__
template<typename Derived> template<typename Derived>
template<typename OtherDerived> template<typename OtherDerived>
inline const Product<Derived, OtherDerived> inline const Product<Derived, OtherDerived>
@ -412,8 +415,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
return Product<Derived, OtherDerived>(derived(), other.derived()); return Product<Derived, OtherDerived>(derived(), other.derived());
} }
#endif // __CUDACC__
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
* *
* The returned product will behave like any other expressions: the coefficients of the product will be * The returned product will behave like any other expressions: the coefficients of the product will be

View file

@ -230,7 +230,7 @@ pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(
* duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
* Currently, this function is only used for scalar * complex products. * Currently, this function is only used for scalar * complex products.
*/ */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; } ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
/** \internal \returns a packet with elements of \a *from quadrupled. /** \internal \returns a packet with elements of \a *from quadrupled.
@ -278,7 +278,7 @@ inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
} }
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
template<typename Packet> inline Packet template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
plset(const typename unpacket_traits<Packet>::type& a) { return a; } plset(const typename unpacket_traits<Packet>::type& a) { return a; }
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
@ -482,7 +482,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& fro
* by the current computation. * by the current computation.
*/ */
template<typename Packet, int LoadMode> template<typename Packet, int LoadMode>
inline Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
{ {
return ploadt<Packet, LoadMode>(from); return ploadt<Packet, LoadMode>(from);
} }

View file

@ -20,11 +20,17 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
{ {
typedef traits<PlainObjectType> TraitsBase; typedef traits<PlainObjectType> TraitsBase;
enum { enum {
PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
? PlainObjectType::ColsAtCompileTime
: PlainObjectType::RowsAtCompileTime,
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
? int(PlainObjectType::InnerStrideAtCompileTime) ? int(PlainObjectType::InnerStrideAtCompileTime)
: int(StrideType::InnerStrideAtCompileTime), : int(StrideType::InnerStrideAtCompileTime),
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime) ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
? Dynamic
: int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
: int(StrideType::OuterStrideAtCompileTime), : int(StrideType::OuterStrideAtCompileTime),
Alignment = int(MapOptions)&int(AlignedMask), Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit), Flags0 = TraitsBase::Flags & (~NestByRefBit),
@ -107,10 +113,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Index outerStride() const inline Index outerStride() const
{ {
return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer()
: IsVectorAtCompileTime ? this->size() : int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
: int(Flags)&RowMajorBit ? this->cols() : IsVectorAtCompileTime ? (this->size() * innerStride())
: this->rows(); : (int(Flags)&RowMajorBit) ? (this->cols() * innerStride())
: (this->rows() * innerStride());
} }
/** Constructor in the fixed-size case. /** Constructor in the fixed-size case.

View file

@ -348,31 +348,7 @@ struct norm1_retval
* Implementation of hypot * * Implementation of hypot *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template<typename Scalar> struct hypot_impl;
struct hypot_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x, const Scalar& y)
{
EIGEN_USING_STD_MATH(abs);
EIGEN_USING_STD_MATH(sqrt);
RealScalar _x = abs(x);
RealScalar _y = abs(y);
Scalar p, qp;
if(_x>_y)
{
p = _x;
qp = _y / p;
}
else
{
p = _y;
qp = _x / p;
}
if(p==RealScalar(0)) return RealScalar(0);
return p * sqrt(RealScalar(1) + qp*qp);
}
};
template<typename Scalar> template<typename Scalar>
struct hypot_retval struct hypot_retval
@ -495,7 +471,7 @@ namespace std_fallback {
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_USING_STD_MATH(log); EIGEN_USING_STD_MATH(log);
Scalar x1p = RealScalar(1) + x; Scalar x1p = RealScalar(1) + x;
return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
} }
} }
@ -1061,11 +1037,24 @@ double log(const double &x) { return ::log(x); }
template<typename T> template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename NumTraits<T>::Real abs(const T &x) { typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type
abs(const T &x) {
EIGEN_USING_STD_MATH(abs); EIGEN_USING_STD_MATH(abs);
return abs(x); return abs(x);
} }
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type
abs(const T &x) {
return x;
}
#if defined(__SYCL_DEVICE_ONLY__)
EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); }
EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); }
#endif // defined(__SYCL_DEVICE_ONLY__)
#ifdef __CUDACC__ #ifdef __CUDACC__
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float abs(const float &x) { return ::fabsf(x); } float abs(const float &x) { return ::fabsf(x); }

View file

@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x)
return pdiv(p, q); return pdiv(p, q);
} }
template<typename RealScalar>
EIGEN_STRONG_INLINE
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
{
EIGEN_USING_STD_MATH(sqrt);
RealScalar p, qp;
p = numext::maxi(x,y);
if(p==RealScalar(0)) return RealScalar(0);
qp = numext::mini(y,x) / p;
return p * sqrt(RealScalar(1) + qp*qp);
}
template<typename Scalar>
struct hypot_impl
{
typedef typename NumTraits<Scalar>::Real RealScalar;
static inline RealScalar run(const Scalar& x, const Scalar& y)
{
EIGEN_USING_STD_MATH(abs);
return positive_real_hypot<RealScalar>(abs(x), abs(y));
}
};
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View file

@ -160,20 +160,11 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const MatrixBase<OtherDerived>& other); Derived& operator-=(const MatrixBase<OtherDerived>& other);
#ifdef __CUDACC__
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const Product<Derived,OtherDerived,LazyProduct>
operator*(const MatrixBase<OtherDerived> &other) const
{ return this->lazyProduct(other); }
#else
template<typename OtherDerived>
const Product<Derived,OtherDerived> const Product<Derived,OtherDerived>
operator*(const MatrixBase<OtherDerived> &other) const; operator*(const MatrixBase<OtherDerived> &other) const;
#endif
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const Product<Derived,OtherDerived,LazyProduct> const Product<Derived,OtherDerived,LazyProduct>
@ -294,7 +285,7 @@ template<typename Derived> class MatrixBase
* fuzzy comparison such as isApprox() * fuzzy comparison such as isApprox()
* \sa isApprox(), operator!= */ * \sa isApprox(), operator!= */
template<typename OtherDerived> template<typename OtherDerived>
inline bool operator==(const MatrixBase<OtherDerived>& other) const EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase<OtherDerived>& other) const
{ return cwiseEqual(other).all(); } { return cwiseEqual(other).all(); }
/** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other.
@ -302,7 +293,7 @@ template<typename Derived> class MatrixBase
* fuzzy comparison such as isApprox() * fuzzy comparison such as isApprox()
* \sa isApprox(), operator== */ * \sa isApprox(), operator== */
template<typename OtherDerived> template<typename OtherDerived>
inline bool operator!=(const MatrixBase<OtherDerived>& other) const EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase<OtherDerived>& other) const
{ return cwiseNotEqual(other).any(); } { return cwiseNotEqual(other).any(); }
NoAlias<Derived,Eigen::MatrixBase > noalias(); NoAlias<Derived,Eigen::MatrixBase > noalias();

View file

@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); } static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); } static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
static inline int digits10() { return NumTraits<Scalar>::digits10(); }
}; };
template<> struct NumTraits<std::string> template<> struct NumTraits<std::string>

View file

@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
* \a data pointers. * \a data pointers.
* *
* Here is an example using strides:
* \include Matrix_Map_stride.cpp
* Output: \verbinclude Matrix_Map_stride.out
*
* \see class Map * \see class Map
*/ */
//@{ //@{

View file

@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions"); && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
} }
EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
@ -127,7 +127,7 @@ public:
using Base::derived; using Base::derived;
typedef typename Base::Scalar Scalar; typedef typename Base::Scalar Scalar;
operator const Scalar() const EIGEN_STRONG_INLINE operator const Scalar() const
{ {
return internal::evaluator<ProductXpr>(derived()).coeff(0,0); return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
} }
@ -162,7 +162,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
public: public:
EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
{ {
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
@ -170,7 +170,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
return internal::evaluator<Derived>(derived()).coeff(row,col); return internal::evaluator<Derived>(derived()).coeff(row,col);
} }
EIGEN_DEVICE_FUNC Scalar coeff(Index i) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
{ {
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );

View file

@ -32,7 +32,7 @@ struct evaluator<Product<Lhs, Rhs, Options> >
typedef Product<Lhs, Rhs, Options> XprType; typedef Product<Lhs, Rhs, Options> XprType;
typedef product_evaluator<XprType> Base; typedef product_evaluator<XprType> Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
}; };
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
@ -55,7 +55,7 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
const Product<Lhs, Rhs, DefaultProduct> > XprType; const Product<Lhs, Rhs, DefaultProduct> > XprType;
typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base; typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
: Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
{} {}
}; };
@ -68,7 +68,7 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType; typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base; typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>( : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
xpr.index() )) xpr.index() ))
@ -207,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename
static const bool value = true; static const bool value = true;
}; };
template<typename OtherXpr, typename Lhs, typename Rhs>
struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
static const bool value = true;
};
template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2> template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
struct assignment_from_xpr_op_product struct assignment_from_xpr_op_product
{ {
@ -240,19 +246,19 @@ template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct> struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
{ {
template<typename Dst> template<typename Dst>
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
} }
template<typename Dst> template<typename Dst>
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
} }
template<typename Dst> template<typename Dst>
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
}; };
@ -306,25 +312,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
}; };
template<typename Dst> template<typename Dst>
static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
} }
template<typename Dst> template<typename Dst>
static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{ {
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>()); internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
} }
@ -779,7 +785,11 @@ public:
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
Alignment = evaluator<MatrixType>::Alignment Alignment = evaluator<MatrixType>::Alignment,
AsScalarProduct = (DiagonalType::SizeAtCompileTime==1)
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
|| (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
}; };
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@ -791,6 +801,9 @@ public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
{ {
if(AsScalarProduct)
return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
else
return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
} }

View file

@ -407,7 +407,7 @@ protected:
*/ */
template<typename Derived> template<typename Derived>
template<typename Func> template<typename Func>
typename internal::traits<Derived>::Scalar EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::redux(const Func& func) const DenseBase<Derived>::redux(const Func& func) const
{ {
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");

View file

@ -95,6 +95,8 @@ protected:
template<typename Expression> template<typename Expression>
EIGEN_DEVICE_FUNC void construct(Expression& expr) EIGEN_DEVICE_FUNC void construct(Expression& expr)
{ {
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
if(PlainObjectType::RowsAtCompileTime==1) if(PlainObjectType::RowsAtCompileTime==1)
{ {
eigen_assert(expr.rows()==1 || expr.cols()==1); eigen_assert(expr.rows()==1 || expr.cols()==1);

View file

@ -71,7 +71,9 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{} {
EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
}
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows(); } inline Index rows() const { return m_matrix.rows(); }
@ -189,7 +191,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2); TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
} }
typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType; typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
/** \sa MatrixBase::conjugate() const */ /** \sa MatrixBase::conjugate() const */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const ConjugateReturnType conjugate() const inline const ConjugateReturnType conjugate() const

View file

@ -15,33 +15,29 @@ namespace Eigen {
// TODO generalize the scalar type of 'other' // TODO generalize the scalar type of 'other'
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }
template<typename Derived> template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
return derived(); return derived();
} }

View file

@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s
template<typename Decomposition, typename RhsType> template<typename Decomposition, typename RhsType>
struct solve_traits<Decomposition,RhsType,Dense> struct solve_traits<Decomposition,RhsType,Dense>
{ {
typedef Matrix<typename RhsType::Scalar, typedef typename make_proper_matrix_type<typename RhsType::Scalar,
Decomposition::ColsAtCompileTime, Decomposition::ColsAtCompileTime,
RhsType::ColsAtCompileTime, RhsType::ColsAtCompileTime,
RhsType::PlainObject::Options, RhsType::PlainObject::Options,
Decomposition::MaxColsAtCompileTime, Decomposition::MaxColsAtCompileTime,
RhsType::MaxColsAtCompileTime> PlainObject; RhsType::MaxColsAtCompileTime>::type PlainObject;
}; };
template<typename Decomposition, typename RhsType> template<typename Decomposition, typename RhsType>

View file

@ -165,12 +165,13 @@ MatrixBase<Derived>::stableNorm() const
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy; typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean; typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
DerivedCopy copy(derived()); const DerivedCopy copy(derived());
enum { enum {
CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit) CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
|| (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough || (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT) // ifwe cannot allocate on the stack, then let's not bother about this optimization ) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
&& (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
}; };
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>, typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper; typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;

View file

@ -384,7 +384,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
const Product<OtherDerived, Transpose, AliasFreeProduct> const Product<OtherDerived, Transpose, AliasFreeProduct>
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt) operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
{ {
return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived()); return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);
} }
/** \returns the \a matrix with the inverse transpositions applied to the rows. /** \returns the \a matrix with the inverse transpositions applied to the rows.

View file

@ -204,23 +204,7 @@ template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
} }
}; };
template<> struct conj_helper<Packet8f, Packet4cf, false,false> EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
{ return Packet4cf(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet4cf, Packet8f, false,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
{ return Packet4cf(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b) template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
{ {
@ -400,23 +384,7 @@ template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
} }
}; };
template<> struct conj_helper<Packet4d, Packet2cd, false,false> EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
{ return Packet2cd(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet2cd, Packet4d, false,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
{ return Packet2cd(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b) template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
{ {

View file

@ -308,9 +308,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
} }
#ifndef EIGEN_VECTORIZE_AVX512 #ifndef EIGEN_VECTORIZE_AVX512
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
#endif #endif
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) { template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
@ -333,9 +333,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
{ {
__m256d tmp = _mm256_shuffle_pd(a,a,5); __m256d tmp = _mm256_shuffle_pd(a,a,5);
return _mm256_permute2f128_pd(tmp, tmp, 1); return _mm256_permute2f128_pd(tmp, tmp, 1);
#if 0
// This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd
// exhibit the same latency/throughput, but it is here for future reference/benchmarking...
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1); __m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
return _mm256_permute_pd(swap_halves,5); return _mm256_permute_pd(swap_halves,5);
#endif
} }
// pabs should be ok // pabs should be ok

View file

@ -88,9 +88,9 @@ plog<Packet16f>(const Packet16f& _x) {
// x = x + x - 1.0; // x = x + x - 1.0;
// } else { x = x - 1.0; } // } else { x = x - 1.0; }
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ); __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps()); Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
x = psub(x, p16f_1); x = psub(x, p16f_1);
e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps())); e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
x = padd(x, tmp); x = padd(x, tmp);
Packet16f x2 = pmul(x, x); Packet16f x2 = pmul(x, x);
@ -119,8 +119,9 @@ plog<Packet16f>(const Packet16f& _x) {
x = padd(x, y2); x = padd(x, y2);
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF. // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf, return _mm512_mask_blend_ps(iszero_mask,
_mm512_mask_blend_ps(invalid_mask, p16f_nan, x)); _mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
p16f_minus_inf);
} }
#endif #endif
@ -266,8 +267,7 @@ psqrt<Packet16f>(const Packet16f& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are // select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well). // flushed to zero and cause infs as well).
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ); __mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x), Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
_mm512_setzero_ps());
// Do a single step of Newton's iteration. // Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
@ -289,8 +289,7 @@ psqrt<Packet8d>(const Packet8d& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are // select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well). // flushed to zero and cause infs as well).
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ); __mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x), Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
_mm512_setzero_pd());
// Do a first step of Newton's iteration. // Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@ -333,20 +332,18 @@ prsqrt<Packet16f>(const Packet16f& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are // select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well). // flushed to zero and cause infs as well).
__mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ); __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
_mm512_rsqrt14_ps(_x));
// Fill in NaNs and Infs for the negative/zero entries. // Fill in NaNs and Infs for the negative/zero entries.
__mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ); __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
Packet16f infs_and_nans = _mm512_mask_blend_ps( Packet16f infs_and_nans = _mm512_mask_blend_ps(
neg_mask, p16f_nan, neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
_mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
// Do a single step of Newton's iteration. // Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
// Insert NaNs and Infs in all the right places. // Insert NaNs and Infs in all the right places.
return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x); return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
} }
template <> template <>
@ -363,14 +360,12 @@ prsqrt<Packet8d>(const Packet8d& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are // select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well). // flushed to zero and cause infs as well).
__mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ); __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
_mm512_rsqrt14_pd(_x));
// Fill in NaNs and Infs for the negative/zero entries. // Fill in NaNs and Infs for the negative/zero entries.
__mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ); __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
Packet8d infs_and_nans = _mm512_mask_blend_pd( Packet8d infs_and_nans = _mm512_mask_blend_pd(
neg_mask, p8d_nan, neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
_mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
// Do a first step of Newton's iteration. // Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@ -379,9 +374,9 @@ prsqrt<Packet8d>(const Packet8d& _x) {
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
// Insert NaNs and Infs in all the right places. // Insert NaNs and Infs in all the right places.
return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x); return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
} }
#else #elif defined(EIGEN_VECTORIZE_AVX512ER)
template <> template <>
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) { EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
return _mm512_rsqrt28_ps(x); return _mm512_rsqrt28_ps(x);

View file

@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
pstore(to, pa); pstore(to, pa);
} }
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template <> template <>
EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) { EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {

View file

@ -224,23 +224,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
} }
}; };
template<> struct conj_helper<Packet4f, Packet2cf, false,false> EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
};
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ {
@ -416,23 +400,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
return pconj(internal::pmul(a, b)); return pconj(internal::pmul(a, b));
} }
}; };
template<> struct conj_helper<Packet2d, Packet1cd, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
{ return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
};
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {

View file

@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; } template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
{
#ifdef __VSX__
Packet4f ret;
__asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
#else
return vec_min(a, b);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
{
#ifdef __VSX__
Packet4f ret;
__asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
#else
return vec_max(a, b);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
@ -910,9 +928,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const
// for some weird raisons, it has to be overloaded for packet of integers // for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); } template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); } template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
{
Packet2d ret;
__asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
}
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)
{
Packet2d ret;
__asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
return ret;
}
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); } template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
@ -1022,7 +1050,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
Packet2l select = { ifPacket.select[0], ifPacket.select[1] }; Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)); Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) );
return vec_sel(elsePacket, thenPacket, mask); return vec_sel(elsePacket, thenPacket, mask);
} }
#endif // __VSX__ #endif // __VSX__

View file

@ -13,7 +13,7 @@
// Redistribution and use in source and binary forms, with or without // Redistribution and use in source and binary forms, with or without
// modification, are permitted. // modification, are permitted.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
@ -147,55 +147,55 @@ namespace half_impl {
// versions to get the ALU speed increased), but you do save the // versions to get the ALU speed increased), but you do save the
// conversion steps back and forth. // conversion steps back and forth.
__device__ half operator + (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
return __hadd(a, b); return __hadd(a, b);
} }
__device__ half operator * (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
return __hmul(a, b); return __hmul(a, b);
} }
__device__ half operator - (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
return __hsub(a, b); return __hsub(a, b);
} }
__device__ half operator / (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
float num = __half2float(a); float num = __half2float(a);
float denom = __half2float(b); float denom = __half2float(b);
return __float2half(num / denom); return __float2half(num / denom);
} }
__device__ half operator - (const half& a) { EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
return __hneg(a); return __hneg(a);
} }
__device__ half& operator += (half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
a = a + b; a = a + b;
return a; return a;
} }
__device__ half& operator *= (half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
a = a * b; a = a * b;
return a; return a;
} }
__device__ half& operator -= (half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
a = a - b; a = a - b;
return a; return a;
} }
__device__ half& operator /= (half& a, const half& b) { EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
a = a / b; a = a / b;
return a; return a;
} }
__device__ bool operator == (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
return __heq(a, b); return __heq(a, b);
} }
__device__ bool operator != (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
return __hne(a, b); return __hne(a, b);
} }
__device__ bool operator < (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
return __hlt(a, b); return __hlt(a, b);
} }
__device__ bool operator <= (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
return __hle(a, b); return __hle(a, b);
} }
__device__ bool operator > (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
return __hgt(a, b); return __hgt(a, b);
} }
__device__ bool operator >= (const half& a, const half& b) { EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
return __hge(a, b); return __hge(a, b);
} }
@ -238,10 +238,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b)
return a; return a;
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
return float(a) == float(b); return numext::equal_strict(float(a),float(b));
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
return float(a) != float(b); return numext::not_equal_strict(float(a), float(b));
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
return float(a) < float(b); return float(a) < float(b);
@ -386,11 +386,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
return result; return result;
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
return half(hexp(a));
#else
return half(::expf(float(a))); return half(::expf(float(a)));
#endif
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return Eigen::half(::hlog(a)); return half(::hlog(a));
#else #else
return half(::logf(float(a))); return half(::logf(float(a)));
#endif #endif
@ -402,7 +406,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
return half(::log10f(float(a))); return half(::log10f(float(a)));
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
return half(hsqrt(a));
#else
return half(::sqrtf(float(a))); return half(::sqrtf(float(a)));
#endif
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
return half(::powf(float(a), float(b))); return half(::powf(float(a), float(b)));
@ -420,10 +428,18 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
return half(::tanhf(float(a))); return half(::tanhf(float(a)));
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
return half(hfloor(a));
#else
return half(::floorf(float(a))); return half(::floorf(float(a)));
#endif
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
return half(hceil(a));
#else
return half(::ceilf(float(a))); return half(::ceilf(float(a)));
#endif
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
@ -474,9 +490,59 @@ template<> struct is_arithmetic<half> { enum { value = true }; };
} // end namespace internal } // end namespace internal
} // end namespace Eigen
namespace std {
template<>
struct numeric_limits<Eigen::half> {
static const bool is_specialized = true;
static const bool is_signed = true;
static const bool is_integer = false;
static const bool is_exact = false;
static const bool has_infinity = true;
static const bool has_quiet_NaN = true;
static const bool has_signaling_NaN = true;
static const float_denorm_style has_denorm = denorm_present;
static const bool has_denorm_loss = false;
static const std::float_round_style round_style = std::round_to_nearest;
static const bool is_iec559 = false;
static const bool is_bounded = false;
static const bool is_modulo = false;
static const int digits = 11;
static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
static const int radix = 2;
static const int min_exponent = -13;
static const int min_exponent10 = -4;
static const int max_exponent = 16;
static const int max_exponent10 = 4;
static const bool traps = true;
static const bool tinyness_before = false;
static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); }
static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); }
static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); }
static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); }
static Eigen::half round_error() { return Eigen::half(0.5); }
static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); }
static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); }
static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); }
};
}
namespace Eigen {
template<> struct NumTraits<Eigen::half> template<> struct NumTraits<Eigen::half>
: GenericNumTraits<Eigen::half> : GenericNumTraits<Eigen::half>
{ {
enum {
IsSigned = true,
IsInteger = false,
IsComplex = false,
RequireInitialization = false
};
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() {
return half_impl::raw_uint16_to_half(0x0800); return half_impl::raw_uint16_to_half(0x0800);
} }
@ -507,7 +573,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
return Eigen::half(::expf(float(a))); return Eigen::half(::expf(float(a)));
} }
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return Eigen::half(::hlog(a)); return Eigen::half(::hlog(a));
#else #else
return Eigen::half(::logf(float(a))); return Eigen::half(::logf(float(a)));

View file

@ -291,7 +291,7 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
EIGEN_DEVICE_FUNC inline void EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<float4,4>& kernel) { ptranspose(PacketBlock<float4,4>& kernel) {
double tmp = kernel.packet[0].y; float tmp = kernel.packet[0].y;
kernel.packet[0].y = kernel.packet[1].x; kernel.packet[0].y = kernel.packet[1].x;
kernel.packet[1].x = tmp; kernel.packet[1].x = tmp;

View file

@ -275,7 +275,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
return __floats2half2_rn(r1, r2); return __floats2half2_rn(r1, r2);
} }
#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 #if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
template<> __device__ EIGEN_STRONG_INLINE template<> __device__ EIGEN_STRONG_INLINE
half2 plog<half2>(const half2& a) { half2 plog<half2>(const half2& a) {

View file

@ -0,0 +1,29 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_ARCH_CONJ_HELPER_H
#define EIGEN_ARCH_CONJ_HELPER_H
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL) \
template<> struct conj_helper<PACKET_REAL, PACKET_CPLX, false,false> { \
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_REAL& x, const PACKET_CPLX& y, const PACKET_CPLX& c) const \
{ return padd(c, pmul(x,y)); } \
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_REAL& x, const PACKET_CPLX& y) const \
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x, y.v)); } \
}; \
\
template<> struct conj_helper<PACKET_CPLX, PACKET_REAL, false,false> { \
EIGEN_STRONG_INLINE PACKET_CPLX pmadd(const PACKET_CPLX& x, const PACKET_REAL& y, const PACKET_CPLX& c) const \
{ return padd(c, pmul(x,y)); } \
EIGEN_STRONG_INLINE PACKET_CPLX pmul(const PACKET_CPLX& x, const PACKET_REAL& y) const \
{ return PACKET_CPLX(Eigen::internal::pmul<PACKET_REAL>(x.v, y)); } \
};
#endif // EIGEN_ARCH_CONJ_HELPER_H

View file

@ -67,7 +67,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{ {
float32x2_t r64; float32x2_t r64;
r64 = vld1_f32((float *)&from); r64 = vld1_f32((const float *)&from);
return Packet2cf(vcombine_f32(r64, r64)); return Packet2cf(vcombine_f32(r64, r64));
} }
@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3)); to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
} }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{ {
@ -265,6 +265,8 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
} }
}; };
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ {
// TODO optimize it for NEON // TODO optimize it for NEON
@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
s = vmulq_f32(b.v, b.v); s = vmulq_f32(b.v, b.v);
rev_s = vrev64q_f32(s); rev_s = vrev64q_f32(s);
return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
} }
EIGEN_DEVICE_FUNC inline void EIGEN_DEVICE_FUNC inline void
@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride) template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
{ {
@ -456,6 +458,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
} }
}; };
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {
// TODO optimize it for NEON // TODO optimize it for NEON

View file

@ -36,12 +36,43 @@ namespace internal {
#endif #endif
#endif #endif
#if EIGEN_COMP_MSVC
// In MSVC's arm_neon.h header file, all NEON vector types
// are aliases to the same underlying type __n128.
// We thus have to wrap them to make them different C++ types.
// (See also bug 1428)
template<typename T,int unique_id>
struct eigen_packet_wrapper
{
operator T&() { return m_val; }
operator const T&() const { return m_val; }
eigen_packet_wrapper() {}
eigen_packet_wrapper(const T &v) : m_val(v) {}
eigen_packet_wrapper& operator=(const T &v) {
m_val = v;
return *this;
}
T m_val;
};
typedef eigen_packet_wrapper<float32x2_t,0> Packet2f;
typedef eigen_packet_wrapper<float32x4_t,1> Packet4f;
typedef eigen_packet_wrapper<int32x4_t ,2> Packet4i;
typedef eigen_packet_wrapper<int32x2_t ,3> Packet2i;
typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui;
#else
typedef float32x2_t Packet2f; typedef float32x2_t Packet2f;
typedef float32x4_t Packet4f; typedef float32x4_t Packet4f;
typedef int32x4_t Packet4i; typedef int32x4_t Packet4i;
typedef int32x2_t Packet2i; typedef int32x2_t Packet2i;
typedef uint32x4_t Packet4ui; typedef uint32x4_t Packet4ui;
#endif // EIGEN_COMP_MSVC
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f p4f_##NAME = pset1<Packet4f>(X) const Packet4f p4f_##NAME = pset1<Packet4f>(X)
@ -51,14 +82,17 @@ typedef uint32x4_t Packet4ui;
#define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \ #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
const Packet4i p4i_##NAME = pset1<Packet4i>(X) const Packet4i p4i_##NAME = pset1<Packet4i>(X)
// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function #if EIGEN_ARCH_ARM64
// which available on LLVM and GCC (at least) // __builtin_prefetch tends to do nothing on ARM64 compilers because the
#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC // prefetch instructions there are too detailed for __builtin_prefetch to map
// meaningfully to them.
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__("prfm pldl1keep, [%[addr]]\n" ::[addr] "r"(ADDR) : );
#elif EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
#define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
#elif defined __pld #elif defined __pld
#define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
#elif !EIGEN_ARCH_ARM64 #elif EIGEN_ARCH_ARM32
#define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ("pld [%[addr]]\n" :: [addr] "r" (ADDR) : );
#else #else
// by default no explicit prefetching // by default no explicit prefetching
#define EIGEN_ARM_PREFETCH(ADDR) #define EIGEN_ARM_PREFETCH(ADDR)
@ -113,7 +147,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from)
template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
{ {
const float32_t f[] = {0, 1, 2, 3}; const float f[] = {0, 1, 2, 3};
Packet4f countdown = vld1q_f32(f); Packet4f countdown = vld1q_f32(f);
return vaddq_f32(pset1<Packet4f>(a), countdown); return vaddq_f32(pset1<Packet4f>(a), countdown);
} }

View file

@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3))); _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
} }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{ {
@ -229,23 +229,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
} }
}; };
template<> struct conj_helper<Packet4f, Packet2cf, false,false> EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
};
template<> struct conj_helper<Packet2cf, Packet4f, false,false>
{
EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
{ return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ {
@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); } template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{ {
@ -430,23 +414,7 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
} }
}; };
template<> struct conj_helper<Packet2d, Packet1cd, false,false> EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
};
template<> struct conj_helper<Packet1cd, Packet2d, false,false>
{
EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
{ return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {

View file

@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double&
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0))); pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
} }
#if EIGEN_COMP_PGI
typedef const void * SsePrefetchPtrType;
#else
typedef const char * SsePrefetchPtrType;
#endif
#ifndef EIGEN_VECTORIZE_AVX #ifndef EIGEN_VECTORIZE_AVX
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
#endif #endif
#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
@ -876,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co
} // end namespace Eigen } // end namespace Eigen
#if EIGEN_COMP_PGI
// PGI++ does not define the following intrinsics in C++ mode.
static inline __m128 _mm_castpd_ps (__m128d x) { return reinterpret_cast<__m128&>(x); }
static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }
static inline __m128d _mm_castps_pd (__m128 x) { return reinterpret_cast<__m128d&>(x); }
static inline __m128i _mm_castps_si128(__m128 x) { return reinterpret_cast<__m128i&>(x); }
static inline __m128 _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x); }
static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }
#endif
#endif // EIGEN_PACKET_MATH_SSE_H #endif // EIGEN_PACKET_MATH_SSE_H

View file

@ -14,6 +14,7 @@ namespace Eigen {
namespace internal { namespace internal {
#ifndef EIGEN_VECTORIZE_AVX
template <> template <>
struct type_casting_traits<float, int> { struct type_casting_traits<float, int> {
enum { enum {
@ -23,11 +24,6 @@ struct type_casting_traits<float, int> {
}; };
}; };
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
return _mm_cvttps_epi32(a);
}
template <> template <>
struct type_casting_traits<int, float> { struct type_casting_traits<int, float> {
enum { enum {
@ -37,11 +33,6 @@ struct type_casting_traits<int, float> {
}; };
}; };
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
return _mm_cvtepi32_ps(a);
}
template <> template <>
struct type_casting_traits<double, float> { struct type_casting_traits<double, float> {
enum { enum {
@ -51,10 +42,6 @@ struct type_casting_traits<double, float> {
}; };
}; };
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
}
template <> template <>
struct type_casting_traits<float, double> { struct type_casting_traits<float, double> {
enum { enum {
@ -63,6 +50,19 @@ struct type_casting_traits<float, double> {
TgtCoeffRatio = 2 TgtCoeffRatio = 2
}; };
}; };
#endif
template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
return _mm_cvttps_epi32(a);
}
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
return _mm_cvtepi32_ps(a);
}
template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
}
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) { template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
// Simply discard the second half of the input // Simply discard the second half of the input

View file

@ -336,6 +336,9 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
} }
}; };
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ {
// TODO optimize it for AltiVec // TODO optimize it for AltiVec

View file

@ -255,7 +255,7 @@ struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,Rh
/** \internal /** \internal
* \brief Template functor to compute the hypot of two scalars * \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
* *
* \sa MatrixBase::stableNorm(), class Redux * \sa MatrixBase::stableNorm(), class Redux
*/ */
@ -263,22 +263,15 @@ template<typename Scalar>
struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar> struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
{ {
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
// typedef typename NumTraits<Scalar>::Real result_type;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
{ {
EIGEN_USING_STD_MATH(sqrt) // This functor is used by hypotNorm only for which it is faster to first apply abs
Scalar p, qp; // on all coefficients prior to reduction through hypot.
if(_x>_y) // This way we avoid calling abs on positive and real entries, and this also permits
{ // to seamlessly handle complexes. Otherwise we would have to handle both real and complexes
p = _x; // through the same functor...
qp = _y / p; return internal::positive_real_hypot(x,y);
}
else
{
p = _y;
qp = _x / p;
}
return p * sqrt(Scalar(1) + qp*qp);
} }
}; };
template<typename Scalar> template<typename Scalar>

View file

@ -44,16 +44,16 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
{ {
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_interPacket(plset<Packet>(0)),
m_flip(numext::abs(high)<numext::abs(low)) m_flip(numext::abs(high)<numext::abs(low))
{} {}
template<typename IndexType> template<typename IndexType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const {
typedef typename NumTraits<Scalar>::Real RealScalar;
if(m_flip) if(m_flip)
return (i==0)? m_low : (m_high - (m_size1-i)*m_step); return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step);
else else
return (i==m_size1)? m_high : (m_low + i*m_step); return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step);
} }
template<typename IndexType> template<typename IndexType>
@ -63,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
if(m_flip) if(m_flip)
{ {
Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket); Packet pi = plset<Packet>(Scalar(i-m_size1));
Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi)); Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
if(i==0) if(i==0)
res = pinsertfirst(res, m_low); res = pinsertfirst(res, m_low);
@ -71,7 +71,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
} }
else else
{ {
Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket); Packet pi = plset<Packet>(Scalar(i));
Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi)); Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
if(i==m_size1-unpacket_traits<Packet>::size+1) if(i==m_size1-unpacket_traits<Packet>::size+1)
res = pinsertlast(res, m_high); res = pinsertlast(res, m_high);
@ -83,7 +83,6 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false>
const Scalar m_high; const Scalar m_high;
const Index m_size1; const Index m_size1;
const Scalar m_step; const Scalar m_step;
const Packet m_interPacket;
const bool m_flip; const bool m_flip;
}; };

View file

@ -83,13 +83,17 @@ struct functor_traits<std::binder1st<T> >
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; }; { enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
#endif #endif
#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910)
// std::unary_negate is deprecated since c++17 and will be removed in c++20
template<typename T> template<typename T>
struct functor_traits<std::unary_negate<T> > struct functor_traits<std::unary_negate<T> >
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; }; { enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
// std::binary_negate is deprecated since c++17 and will be removed in c++20
template<typename T> template<typename T>
struct functor_traits<std::binary_negate<T> > struct functor_traits<std::binary_negate<T> >
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; }; { enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
#endif
#ifdef EIGEN_STDEXT_SUPPORT #ifdef EIGEN_STDEXT_SUPPORT

View file

@ -269,10 +269,13 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
enum { enum {
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0,
SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0
}; };
Index size = mat.cols(); Index size = mat.cols();
if(SkipDiag)
size--;
Index depth = actualLhs.cols(); Index depth = actualLhs.cols();
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar, typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
@ -283,10 +286,11 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
internal::general_matrix_matrix_triangular_product<Index, internal::general_matrix_matrix_triangular_product<Index,
typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
IsRowMajor ? RowMajor : ColMajor, UpLo> IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)>
::run(size, depth, ::run(size, depth,
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(),
mat.data(), mat.outerStride(), actualAlpha, blocking); &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(),
mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking);
} }
}; };
@ -294,6 +298,7 @@ template<typename MatrixType, unsigned int UpLo>
template<typename ProductType> template<typename ProductType>
TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta)
{ {
EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED);
eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols());
general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta);

View file

@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \
const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \
{ \ { \
if (lhs==rhs) { \ if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \
general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \
::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \
} else { \ } else { \
@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \ BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \ char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
EIGTYPE beta(1); \ EIGTYPE beta(1); \
BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \ BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \
} \ } \
}; };
@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
} \ } \
}; };
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk)
#else
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_) EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_) EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_)
#endif
// TODO hanlde complex cases // TODO hanlde complex cases
// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_) // EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)

View file

@ -46,7 +46,7 @@ namespace internal {
// gemm specialization // gemm specialization
#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \ #define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \
template< \ template< \
typename Index, \ typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
@ -100,13 +100,20 @@ static void run(Index rows, Index cols, Index depth, \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \ } else b = _rhs; \
\ \
BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
}}; }};
GEMM_SPECIALIZATION(double, d, double, d) #ifdef EIGEN_USE_MKL
GEMM_SPECIALIZATION(float, f, float, s) GEMM_SPECIALIZATION(double, d, double, dgemm)
GEMM_SPECIALIZATION(dcomplex, cd, double, z) GEMM_SPECIALIZATION(float, f, float, sgemm)
GEMM_SPECIALIZATION(scomplex, cf, float, c) GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)
GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, cgemm)
#else
GEMM_SPECIALIZATION(double, d, double, dgemm_)
GEMM_SPECIALIZATION(float, f, float, sgemm_)
GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
GEMM_SPECIALIZATION(scomplex, cf, float, cgemm_)
#endif
} // end namespase internal } // end namespase internal

View file

@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
alignmentPattern = AllAligned; alignmentPattern = AllAligned;
} }
const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce) for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
alignmentPattern = AllAligned; alignmentPattern = AllAligned;
} }
const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)

View file

@ -85,7 +85,7 @@ EIGEN_BLAS_GEMV_SPECIALIZE(float)
EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex) EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
EIGEN_BLAS_GEMV_SPECIALIZE(scomplex) EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \ #define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \ template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \ { \
@ -113,14 +113,21 @@ static void run( \
x_ptr=x_tmp.data(); \ x_ptr=x_tmp.data(); \
incx=1; \ incx=1; \
} else x_ptr=rhs; \ } else x_ptr=rhs; \
BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\ }\
}; };
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s) EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z) EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c) EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
#else
EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
#endif
} // end namespase internal } // end namespase internal

View file

@ -40,7 +40,7 @@ namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -81,13 +81,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \ } else b = _rhs; \
\ \
BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\ \
} \ } \
}; };
#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -144,20 +144,26 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \ } \
\ \
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\ \
} \ } \
}; };
EIGEN_BLAS_SYMM_L(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMM_L(float, float, f, s) EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z) EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
EIGEN_BLAS_HEMM_L(scomplex, float, cf, c) EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
#else
EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
#endif
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -197,13 +203,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _lhs; \ } else b = _lhs; \
\ \
BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\ \
} \ } \
}; };
#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \ template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -259,15 +265,21 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \ } \
\ \
BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
} \ } \
}; };
EIGEN_BLAS_SYMM_R(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMM_R(float, float, f, s) EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z) EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
EIGEN_BLAS_HEMM_R(scomplex, float, cf, c) EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
#else
EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
#endif
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View file

@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
x_tmp=map_x.conjugate(); \ x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \ x_ptr=x_tmp.data(); \
} else x_ptr=_rhs; \ } else x_ptr=_rhs; \
BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\ }\
}; };
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv)
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv)
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
#else
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_) EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_) EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_) EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_) EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
#endif
} // end namespace internal } // end namespace internal

View file

@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert())); // To work around an "error: member reference base type 'Matrix<...>
// (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
// not a structure or union" compilation error in nvcc (tested V8.0.61),
// create a dummy internal::constructor_without_unaligned_array_assert
// object to pass to the Matrix constructor.
internal::constructor_without_unaligned_array_assert a;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero(); triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag) if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero(); triangularBuffer.diagonal().setZero();
@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert())); internal::constructor_without_unaligned_array_assert a;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero(); triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag) if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero(); triangularBuffer.diagonal().setZero();
@ -393,6 +400,8 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
{ {
template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha) template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
{ {
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar Scalar; typedef typename Dest::Scalar Scalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits; typedef internal::blas_traits<Lhs> LhsBlasTraits;
@ -405,8 +414,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
* RhsBlasTraits::extractScalarFactor(a_rhs); RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
@ -431,6 +441,21 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
&dst.coeffRef(0,0), dst.outerStride(), // result info &dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha, blocking actualAlpha, blocking
); );
// Apply correction if the diagonal is unit and a scalar factor was nested:
if ((Mode&UnitDiag)==UnitDiag)
{
if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
}
else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
{
Index diagSize = (std::min)(rhs.rows(),rhs.cols());
dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
}
}
} }
}; };

View file

@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false) EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general) // implements col-major += alpha * op(triangular) * op(general)
#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \ template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \ } \
/*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \ /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \ /* call ?trmm*/ \
BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\ \
/* Add op(a_triangular)*b into res*/ \ /* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \ } \
}; };
EIGEN_BLAS_TRMM_L(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z) EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
EIGEN_BLAS_TRMM_L(float, float, f, s) EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
EIGEN_BLAS_TRMM_L(scomplex, float, cf, c) EIGEN_BLAS_TRMM_L(float, float, f, strmm)
EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
#else
EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
#endif
// implements col-major += alpha * op(general) * op(triangular) // implements col-major += alpha * op(general) * op(triangular)
#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \ template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \ int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \ int RhsStorageOrder, bool ConjugateRhs> \
@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \ } \
/*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \ /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \ /* call ?trmm*/ \
BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\ \
/* Add op(a_triangular)*b into res*/ \ /* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \ } \
}; };
EIGEN_BLAS_TRMM_R(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z) EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
EIGEN_BLAS_TRMM_R(float, float, f, s) EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
EIGEN_BLAS_TRMM_R(scomplex, float, cf, c) EIGEN_BLAS_TRMM_R(float, float, f, strmm)
EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
#else
EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
#endif
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen

View file

@ -221,8 +221,9 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
* RhsBlasTraits::extractScalarFactor(rhs); RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum { enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@ -274,6 +275,12 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
else else
dest = MappedDest(actualDestPtr, dest.size()); dest = MappedDest(actualDestPtr, dest.size());
} }
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
}
} }
}; };
@ -295,8 +302,9 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
* RhsBlasTraits::extractScalarFactor(rhs); RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum { enum {
DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
@ -326,6 +334,12 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
actualRhsPtr,1, actualRhsPtr,1,
dest.data(),dest.innerStride(), dest.data(),dest.innerStride(),
actualAlpha); actualAlpha);
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
}
} }
}; };

View file

@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
EIGEN_BLAS_TRMV_SPECIALIZE(scomplex) EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector // implements col-major: res += alpha * op(triangular) * vector
#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \ enum { \
@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \ diag = IsUnitDiag ? 'U' : 'N'; \
\ \
/* call ?TRMV*/ \ /* call ?TRMV*/ \
BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \ BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\ \
/* Add op(a_tr)rhs into res*/ \ /* Add op(a_tr)rhs into res*/ \
BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \ BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \ /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \ if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@ -142,18 +142,25 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
m = convert_index<BlasIndex>(size); \ m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \ n = convert_index<BlasIndex>(cols-size); \
} \ } \
BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \ } \
} \ } \
}; };
EIGEN_BLAS_TRMV_CM(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z) EIGEN_BLAS_TRMV_CM(double, double, d, d,)
EIGEN_BLAS_TRMV_CM(float, float, f, s) EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c) EIGEN_BLAS_TRMV_CM(float, float, f, s,)
EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8, cf, c,)
#else
EIGEN_BLAS_TRMV_CM(double, double, d, d, _)
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
EIGEN_BLAS_TRMV_CM(float, float, f, s, _)
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c, _)
#endif
// implements row-major: res += alpha * op(triangular) * vector // implements row-major: res += alpha * op(triangular) * vector
#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ #define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \ enum { \
@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \ diag = IsUnitDiag ? 'U' : 'N'; \
\ \
/* call ?TRMV*/ \ /* call ?TRMV*/ \
BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \ BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\ \
/* Add op(a_tr)rhs into res*/ \ /* Add op(a_tr)rhs into res*/ \
BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \ BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \ /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \ if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@ -224,15 +231,22 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
m = convert_index<BlasIndex>(size); \ m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \ n = convert_index<BlasIndex>(cols-size); \
} \ } \
BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \ } \
} \ } \
}; };
EIGEN_BLAS_TRMV_RM(double, double, d, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z) EIGEN_BLAS_TRMV_RM(double, double, d, d,)
EIGEN_BLAS_TRMV_RM(float, float, f, s) EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c) EIGEN_BLAS_TRMV_RM(float, float, f, s,)
EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8, cf, c,)
#else
EIGEN_BLAS_TRMV_RM(double, double, d, d,_)
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
EIGEN_BLAS_TRMV_RM(float, float, f, s,_)
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c,_)
#endif
} // end namespase internal } // end namespase internal

View file

@ -38,7 +38,7 @@ namespace Eigen {
namespace internal { namespace internal {
// implements LeftSide op(triangular)^-1 * general // implements LeftSide op(triangular)^-1 * general
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \ #define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \ { \
@ -80,18 +80,24 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
} \ } \
if (IsUnitDiag) diag='U'; \ if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \ /* call ?trsm*/ \
BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
} \ } \
}; };
EIGEN_BLAS_TRSM_L(double, double, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRSM_L(dcomplex, double, z) EIGEN_BLAS_TRSM_L(double, double, dtrsm)
EIGEN_BLAS_TRSM_L(float, float, s) EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
EIGEN_BLAS_TRSM_L(scomplex, float, c) EIGEN_BLAS_TRSM_L(float, float, strsm)
EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
#else
EIGEN_BLAS_TRSM_L(double, double, dtrsm_)
EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
EIGEN_BLAS_TRSM_L(float, float, strsm_)
EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_)
#endif
// implements RightSide general * op(triangular)^-1 // implements RightSide general * op(triangular)^-1
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \ #define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \ { \
@ -133,16 +139,22 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
} \ } \
if (IsUnitDiag) diag='U'; \ if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \ /* call ?trsm*/ \
BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \ /*std::cout << "TRMS_L specialization!\n";*/ \
} \ } \
}; };
EIGEN_BLAS_TRSM_R(double, double, d) #ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRSM_R(dcomplex, double, z) EIGEN_BLAS_TRSM_R(double, double, dtrsm)
EIGEN_BLAS_TRSM_R(float, float, s) EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
EIGEN_BLAS_TRSM_R(scomplex, float, c) EIGEN_BLAS_TRSM_R(float, float, strsm)
EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8, ctrsm)
#else
EIGEN_BLAS_TRSM_R(double, double, dtrsm_)
EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
EIGEN_BLAS_TRSM_R(float, float, strsm_)
EIGEN_BLAS_TRSM_R(scomplex, float, ctrsm_)
#endif
} // end namespace internal } // end namespace internal

View file

@ -49,10 +49,11 @@
#define EIGEN_USE_LAPACKE #define EIGEN_USE_LAPACKE
#endif #endif
#if defined(EIGEN_USE_MKL_VML) #if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
#define EIGEN_USE_MKL #define EIGEN_USE_MKL
#endif #endif
#if defined EIGEN_USE_MKL #if defined EIGEN_USE_MKL
# include <mkl.h> # include <mkl.h>
/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/ /*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
@ -108,6 +109,10 @@
#endif #endif
#endif #endif
#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
#include "../../misc/blas.h"
#endif
namespace Eigen { namespace Eigen {
typedef std::complex<double> dcomplex; typedef std::complex<double> dcomplex;
@ -121,8 +126,5 @@ typedef int BlasIndex;
} // end namespace Eigen } // end namespace Eigen
#if defined(EIGEN_USE_BLAS)
#include "../../misc/blas.h"
#endif
#endif // EIGEN_MKL_SUPPORT_H #endif // EIGEN_MKL_SUPPORT_H

View file

@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3 #define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 3 #define EIGEN_MAJOR_VERSION 3
#define EIGEN_MINOR_VERSION 3 #define EIGEN_MINOR_VERSION 5
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@ -399,7 +399,7 @@
// Does the compiler support variadic templates? // Does the compiler support variadic templates?
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES #ifndef EIGEN_HAS_VARIADIC_TEMPLATES
#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
&& ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
// this prevents nvcc from crashing when compiling Eigen on Tegra X1 // this prevents nvcc from crashing when compiling Eigen on Tegra X1
#define EIGEN_HAS_VARIADIC_TEMPLATES 1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1
@ -413,7 +413,7 @@
#ifdef __CUDACC__ #ifdef __CUDACC__
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
#define EIGEN_HAS_CONSTEXPR 1 #define EIGEN_HAS_CONSTEXPR 1
#endif #endif
#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
@ -487,11 +487,13 @@
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC, // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
// but GCC is still doing fine with just inline. // but GCC is still doing fine with just inline.
#ifndef EIGEN_STRONG_INLINE
#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC #if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
#define EIGEN_STRONG_INLINE __forceinline #define EIGEN_STRONG_INLINE __forceinline
#else #else
#define EIGEN_STRONG_INLINE inline #define EIGEN_STRONG_INLINE inline
#endif #endif
#endif
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible // EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
// attribute to maximize inlining. This should only be used when really necessary: in particular, // attribute to maximize inlining. This should only be used when really necessary: in particular,
@ -812,7 +814,8 @@ namespace Eigen {
// just an empty macro ! // just an empty macro !
#define EIGEN_EMPTY #define EIGEN_EMPTY
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) #if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
// for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =; using Base::operator =;
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
@ -986,7 +989,13 @@ namespace Eigen {
# define EIGEN_NOEXCEPT # define EIGEN_NOEXCEPT
# define EIGEN_NOEXCEPT_IF(x) # define EIGEN_NOEXCEPT_IF(x)
# define EIGEN_NO_THROW throw() # define EIGEN_NO_THROW throw()
# if EIGEN_COMP_MSVC
// MSVC does not support exception specifications (warning C4290),
// and they are deprecated in c++11 anyway.
# define EIGEN_EXCEPTION_SPEC(X) throw()
# else
# define EIGEN_EXCEPTION_SPEC(X) throw(X) # define EIGEN_EXCEPTION_SPEC(X) throw(X)
# endif # endif
#endif
#endif // EIGEN_MACROS_H #endif // EIGEN_MACROS_H

View file

@ -70,7 +70,7 @@ inline void throw_std_bad_alloc()
throw std::bad_alloc(); throw std::bad_alloc();
#else #else
std::size_t huge = static_cast<std::size_t>(-1); std::size_t huge = static_cast<std::size_t>(-1);
new int[huge]; ::operator new(huge);
#endif #endif
} }
@ -493,7 +493,7 @@ template<typename T> struct smart_copy_helper<T,true> {
IntPtr size = IntPtr(end)-IntPtr(start); IntPtr size = IntPtr(end)-IntPtr(start);
if(size==0) return; if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0); eigen_internal_assert(start!=0 && end!=0 && target!=0);
memcpy(target, start, size); std::memcpy(target, start, size);
} }
}; };
@ -696,7 +696,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
/** \class aligned_allocator /** \class aligned_allocator
* \ingroup Core_Module * \ingroup Core_Module
* *
* \brief STL compatible allocator to use with with 16 byte aligned types * \brief STL compatible allocator to use with types requiring a non standrad alignment.
*
* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
* By default, it will thus provide at least 16 bytes alignment and more in following cases:
* - 32 bytes alignment if AVX is enabled.
* - 64 bytes alignment if AVX512 is enabled.
*
* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
* \link TopicPreprocessorDirectivesPerformance there \endlink.
* *
* Example: * Example:
* \code * \code

View file

@ -485,6 +485,26 @@ T div_ceil(const T &a, const T &b)
return (a+b-1) / b; return (a+b-1) / b;
} }
// The aim of the following functions is to bypass -Wfloat-equal warnings
// when we really want a strict equality comparison on floating points.
template<typename X, typename Y> EIGEN_STRONG_INLINE
bool equal_strict(const X& x,const Y& y) { return x == y; }
template<> EIGEN_STRONG_INLINE
bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
template<> EIGEN_STRONG_INLINE
bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
template<typename X, typename Y> EIGEN_STRONG_INLINE
bool not_equal_strict(const X& x,const Y& y) { return x != y; }
template<> EIGEN_STRONG_INLINE
bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
template<> EIGEN_STRONG_INLINE
bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
} // end namespace numext } // end namespace numext
} // end namespace Eigen } // end namespace Eigen

View file

@ -24,6 +24,7 @@
* *
*/ */
#ifndef EIGEN_STATIC_ASSERT
#ifndef EIGEN_NO_STATIC_ASSERT #ifndef EIGEN_NO_STATIC_ASSERT
#if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)) #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
@ -44,64 +45,65 @@
struct static_assertion<true> struct static_assertion<true>
{ {
enum { enum {
YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX, YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,
YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES, YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,
YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,
THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,
THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,
THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,
OUT_OF_RANGE_ACCESS, OUT_OF_RANGE_ACCESS=1,
YOU_MADE_A_PROGRAMMING_MISTAKE, YOU_MADE_A_PROGRAMMING_MISTAKE=1,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,
EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE, EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,
YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR, YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,
YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR, YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,
UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC, UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,
THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,
FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED, FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,
NUMERIC_TYPE_MUST_BE_REAL, NUMERIC_TYPE_MUST_BE_REAL=1,
COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED, COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,
WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,
THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE, THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,
INVALID_MATRIX_PRODUCT, INVALID_MATRIX_PRODUCT=1,
INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS, INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,
INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION, INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY, YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,
THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,
THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES, THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,
INVALID_MATRIX_TEMPLATE_PARAMETERS, INVALID_MATRIX_TEMPLATE_PARAMETERS=1,
INVALID_MATRIXBASE_TEMPLATE_PARAMETERS, INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,
BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER, BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,
THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX, THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,
THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE, THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES, THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,
YOU_ALREADY_SPECIFIED_THIS_STRIDE, YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,
INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION, INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,
THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1, PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,
THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS, THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,
YOU_CANNOT_MIX_ARRAYS_AND_MATRICES, YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,
YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION, YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,
THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY, THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,
YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT, YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,
THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS, THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,
THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,
THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL, THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,
THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES, THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED, YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED, YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,
THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE, THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,
THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,
OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,
IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,
STORAGE_LAYOUT_DOES_NOT_MATCH, STORAGE_LAYOUT_DOES_NOT_MATCH=1,
EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,
THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,
MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,
THIS_TYPE_IS_NOT_SUPPORTED, THIS_TYPE_IS_NOT_SUPPORTED=1,
STORAGE_KIND_MUST_MATCH, STORAGE_KIND_MUST_MATCH=1,
STORAGE_INDEX_MUST_MATCH, STORAGE_INDEX_MUST_MATCH=1,
CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,
SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1
}; };
}; };
@ -131,7 +133,7 @@
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG); #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
#endif // EIGEN_NO_STATIC_ASSERT #endif // EIGEN_NO_STATIC_ASSERT
#endif // EIGEN_STATIC_ASSERT
// static assertion failing if the type \a TYPE is not a vector type // static assertion failing if the type \a TYPE is not a vector type
#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \ #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \

View file

@ -311,7 +311,6 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
// Aliases: // Aliases:
Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size); Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size);
ComplexVectorType &cv = m_tmp; ComplexVectorType &cv = m_tmp;
const MatrixType &mZ = m_realQZ.matrixZ();
const MatrixType &mS = m_realQZ.matrixS(); const MatrixType &mS = m_realQZ.matrixS();
const MatrixType &mT = m_realQZ.matrixT(); const MatrixType &mT = m_realQZ.matrixT();
@ -351,7 +350,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
} }
} }
} }
m_eivec.col(i).real().noalias() = mZ.transpose() * v; m_eivec.col(i).real().noalias() = m_realQZ.matrixZ().transpose() * v;
m_eivec.col(i).real().normalize(); m_eivec.col(i).real().normalize();
m_eivec.col(i).imag().setConstant(0); m_eivec.col(i).imag().setConstant(0);
} }
@ -400,7 +399,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
/ (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j))); / (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j)));
} }
} }
m_eivec.col(i+1).noalias() = (mZ.transpose() * cv); m_eivec.col(i+1).noalias() = (m_realQZ.matrixZ().transpose() * cv);
m_eivec.col(i+1).normalize(); m_eivec.col(i+1).normalize();
m_eivec.col(i) = m_eivec.col(i+1).conjugate(); m_eivec.col(i) = m_eivec.col(i+1).conjugate();
} }

View file

@ -303,7 +303,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
Scalar exshift(0); // sum of exceptional shifts Scalar exshift(0); // sum of exceptional shifts
Scalar norm = computeNormOfT(); Scalar norm = computeNormOfT();
if(norm!=0) if(norm!=Scalar(0))
{ {
while (iu >= 0) while (iu >= 0)
{ {
@ -327,7 +327,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa
else // No convergence yet else // No convergence yet
{ {
// The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG ) // The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG )
Vector3s firstHouseholderVector(0,0,0), shiftInfo; Vector3s firstHouseholderVector = Vector3s::Zero(), shiftInfo;
computeShift(iu, iter, exshift, shiftInfo); computeShift(iu, iter, exshift, shiftInfo);
iter = iter + 1; iter = iter + 1;
totalIter = totalIter + 1; totalIter = totalIter + 1;

View file

@ -37,7 +37,7 @@ namespace Eigen {
/** \internal Specialization for the data types supported by LAPACKe */ /** \internal Specialization for the data types supported by LAPACKe */
#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW, LAPACKE_COLROW ) \ #define EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW ) \
template<> template<typename InputType> inline \ template<> template<typename InputType> inline \
SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \
SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \
@ -47,7 +47,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
&& (options&EigVecMask)!=EigVecMask \ && (options&EigVecMask)!=EigVecMask \
&& "invalid option parameter"); \ && "invalid option parameter"); \
bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \ bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \
lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, matrix_order, info; \ lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, info; \
m_eivalues.resize(n,1); \ m_eivalues.resize(n,1); \
m_subdiag.resize(n-1); \ m_subdiag.resize(n-1); \
m_eivec = matrix; \ m_eivec = matrix; \
@ -63,27 +63,24 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c
} \ } \
\ \
lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \ lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \
matrix_order=LAPACKE_COLROW; \
char jobz, uplo='L'/*, range='A'*/; \ char jobz, uplo='L'/*, range='A'*/; \
jobz = computeEigenvectors ? 'V' : 'N'; \ jobz = computeEigenvectors ? 'V' : 'N'; \
\ \
info = LAPACKE_##LAPACKE_NAME( matrix_order, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \ info = LAPACKE_##LAPACKE_NAME( LAPACK_COL_MAJOR, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \
m_info = (info==0) ? Success : NoConvergence; \ m_info = (info==0) ? Success : NoConvergence; \
m_isInitialized = true; \ m_isInitialized = true; \
m_eigenvectorsOk = computeEigenvectors; \ m_eigenvectorsOk = computeEigenvectors; \
return *this; \ return *this; \
} }
#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME ) \
EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, ColMajor ) \
EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, RowMajor )
EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR) EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev)
EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR) EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev)
EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, ColMajor, LAPACK_COL_MAJOR) EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev)
EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, ColMajor, LAPACK_COL_MAJOR) EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev)
EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, RowMajor, LAPACK_ROW_MAJOR)
EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, RowMajor, LAPACK_ROW_MAJOR)
} // end namespace Eigen } // end namespace Eigen

View file

@ -178,7 +178,7 @@ EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const Quaterni
if (n != Scalar(0)) if (n != Scalar(0))
{ {
m_angle = Scalar(2)*atan2(n, abs(q.w())); m_angle = Scalar(2)*atan2(n, abs(q.w()));
if(q.w() < 0) if(q.w() < Scalar(0))
n = -n; n = -n;
m_axis = q.vec() / n; m_axis = q.vec() / n;
} }

View file

@ -43,6 +43,11 @@ class QuaternionBase : public RotationBase<Derived, 3>
typedef typename internal::traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename NumTraits<Scalar>::Real RealScalar;
typedef typename internal::traits<Derived>::Coefficients Coefficients; typedef typename internal::traits<Derived>::Coefficients Coefficients;
typedef typename Coefficients::CoeffReturnType CoeffReturnType;
typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit),
Scalar&, CoeffReturnType>::type NonConstCoeffReturnType;
enum { enum {
Flags = Eigen::internal::traits<Derived>::Flags Flags = Eigen::internal::traits<Derived>::Flags
}; };
@ -58,22 +63,22 @@ class QuaternionBase : public RotationBase<Derived, 3>
/** \returns the \c x coefficient */ /** \returns the \c x coefficient */
EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); } EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); }
/** \returns the \c y coefficient */ /** \returns the \c y coefficient */
EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); } EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); }
/** \returns the \c z coefficient */ /** \returns the \c z coefficient */
EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); } EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); }
/** \returns the \c w coefficient */ /** \returns the \c w coefficient */
EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); } EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); }
/** \returns a reference to the \c x coefficient */ /** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); } EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); }
/** \returns a reference to the \c y coefficient */ /** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); } EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); }
/** \returns a reference to the \c z coefficient */ /** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); } EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); }
/** \returns a reference to the \c w coefficient */ /** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */
EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); } EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); }
/** \returns a read-only vector expression of the imaginary part (x,y,z) */ /** \returns a read-only vector expression of the imaginary part (x,y,z) */
EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); } EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
@ -423,7 +428,7 @@ typedef Map<Quaternion<double>, Aligned> QuaternionMapAlignedd;
// Generic Quaternion * Quaternion product // Generic Quaternion * Quaternion product
// This product can be specialized for a given architecture via the Arch template argument. // This product can be specialized for a given architecture via the Arch template argument.
namespace internal { namespace internal {
template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product
{ {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
return Quaternion<Scalar> return Quaternion<Scalar>
@ -446,8 +451,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value), EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),
YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
return internal::quat_product<Architecture::Target, Derived, OtherDerived, return internal::quat_product<Architecture::Target, Derived, OtherDerived,
typename internal::traits<Derived>::Scalar, typename internal::traits<Derived>::Scalar>::run(*this, other);
EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other);
} }
/** \sa operator*(Quaternion) */ /** \sa operator*(Quaternion) */
@ -672,7 +676,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
// Generic conjugate of a Quaternion // Generic conjugate of a Quaternion
namespace internal { namespace internal {
template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj template<int Arch, class Derived, typename Scalar> struct quat_conj
{ {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z()); return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
@ -691,8 +695,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
QuaternionBase<Derived>::conjugate() const QuaternionBase<Derived>::conjugate() const
{ {
return internal::quat_conj<Architecture::Target, Derived, return internal::quat_conj<Architecture::Target, Derived,
typename internal::traits<Derived>::Scalar, typename internal::traits<Derived>::Scalar>::run(*this);
internal::traits<Derived>::Alignment>::run(*this);
} }

View file

@ -16,17 +16,23 @@ namespace Eigen {
namespace internal { namespace internal {
template<class Derived, class OtherDerived> template<class Derived, class OtherDerived>
struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16> struct quat_product<Architecture::SSE, Derived, OtherDerived, float>
{ {
enum {
AAlignment = traits<Derived>::Alignment,
BAlignment = traits<OtherDerived>::Alignment,
ResAlignment = traits<Quaternion<float> >::Alignment
};
static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{ {
Quaternion<float> res; Quaternion<float> res;
const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f);
__m128 a = _a.coeffs().template packet<Aligned16>(0); __m128 a = _a.coeffs().template packet<AAlignment>(0);
__m128 b = _b.coeffs().template packet<Aligned16>(0); __m128 b = _b.coeffs().template packet<BAlignment>(0);
__m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2));
__m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1));
pstore(&res.x(), pstoret<float,Packet4f,ResAlignment>(
&res.x(),
_mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)),
_mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0),
vec4f_swizzle1(b,1,2,0,0))), vec4f_swizzle1(b,1,2,0,0))),
@ -36,14 +42,17 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16>
} }
}; };
template<class Derived, int Alignment> template<class Derived>
struct quat_conj<Architecture::SSE, Derived, float, Alignment> struct quat_conj<Architecture::SSE, Derived, float>
{ {
enum {
ResAlignment = traits<Quaternion<float> >::Alignment
};
static inline Quaternion<float> run(const QuaternionBase<Derived>& q) static inline Quaternion<float> run(const QuaternionBase<Derived>& q)
{ {
Quaternion<float> res; Quaternion<float> res;
const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f);
pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<Alignment>(0))); pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
return res; return res;
} }
}; };
@ -52,6 +61,9 @@ struct quat_conj<Architecture::SSE, Derived, float, Alignment>
template<typename VectorLhs,typename VectorRhs> template<typename VectorLhs,typename VectorRhs>
struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
{ {
enum {
ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment
};
static inline typename plain_matrix_type<VectorLhs>::type static inline typename plain_matrix_type<VectorLhs>::type
run(const VectorLhs& lhs, const VectorRhs& rhs) run(const VectorLhs& lhs, const VectorRhs& rhs)
{ {
@ -60,7 +72,7 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
__m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3));
__m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3));
typename plain_matrix_type<VectorLhs>::type res; typename plain_matrix_type<VectorLhs>::type res;
pstore(&res.x(),_mm_sub_ps(mul1,mul2)); pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2));
return res; return res;
} }
}; };
@ -68,9 +80,14 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true>
template<class Derived, class OtherDerived, int Alignment> template<class Derived, class OtherDerived>
struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> struct quat_product<Architecture::SSE, Derived, OtherDerived, double>
{ {
enum {
BAlignment = traits<OtherDerived>::Alignment,
ResAlignment = traits<Quaternion<double> >::Alignment
};
static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b)
{ {
const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
@ -78,8 +95,8 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
Quaternion<double> res; Quaternion<double> res;
const double* a = _a.coeffs().data(); const double* a = _a.coeffs().data();
Packet2d b_xy = _b.coeffs().template packet<Alignment>(0); Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0);
Packet2d b_zw = _b.coeffs().template packet<Alignment>(2); Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2);
Packet2d a_xx = pset1<Packet2d>(a[0]); Packet2d a_xx = pset1<Packet2d>(a[0]);
Packet2d a_yy = pset1<Packet2d>(a[1]); Packet2d a_yy = pset1<Packet2d>(a[1]);
Packet2d a_zz = pset1<Packet2d>(a[2]); Packet2d a_zz = pset1<Packet2d>(a[2]);
@ -97,9 +114,9 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw));
#ifdef EIGEN_VECTORIZE_SSE3 #ifdef EIGEN_VECTORIZE_SSE3
EIGEN_UNUSED_VARIABLE(mask) EIGEN_UNUSED_VARIABLE(mask)
pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2))); pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2)));
#else #else
pstore(&res.x(), padd(t1, pxor(mask,preverse(t2)))); pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2))));
#endif #endif
/* /*
@ -111,25 +128,28 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment>
t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy));
#ifdef EIGEN_VECTORIZE_SSE3 #ifdef EIGEN_VECTORIZE_SSE3
EIGEN_UNUSED_VARIABLE(mask) EIGEN_UNUSED_VARIABLE(mask)
pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2)));
#else #else
pstore(&res.z(), psub(t1, pxor(mask,preverse(t2)))); pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2))));
#endif #endif
return res; return res;
} }
}; };
template<class Derived, int Alignment> template<class Derived>
struct quat_conj<Architecture::SSE, Derived, double, Alignment> struct quat_conj<Architecture::SSE, Derived, double>
{ {
enum {
ResAlignment = traits<Quaternion<double> >::Alignment
};
static inline Quaternion<double> run(const QuaternionBase<Derived>& q) static inline Quaternion<double> run(const QuaternionBase<Derived>& q)
{ {
Quaternion<double> res; Quaternion<double> res;
const __m128d mask0 = _mm_setr_pd(-0.,-0.); const __m128d mask0 = _mm_setr_pd(-0.,-0.);
const __m128d mask2 = _mm_setr_pd(-0.,0.); const __m128d mask2 = _mm_setr_pd(-0.,0.);
pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<Alignment>(0))); pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0)));
pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<Alignment>(2))); pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2)));
return res; return res;
} }
}; };

View file

@ -152,14 +152,29 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar>
{ {
// Compute the inverse squared-norm of each column of mat // Compute the inverse squared-norm of each column of mat
m_invdiag.resize(mat.cols()); m_invdiag.resize(mat.cols());
if(MatType::IsRowMajor)
{
m_invdiag.setZero();
for(Index j=0; j<mat.outerSize(); ++j) for(Index j=0; j<mat.outerSize(); ++j)
{ {
RealScalar sum = mat.innerVector(j).squaredNorm(); for(typename MatType::InnerIterator it(mat,j); it; ++it)
if(sum>0) m_invdiag(it.index()) += numext::abs2(it.value());
}
for(Index j=0; j<mat.cols(); ++j)
if(numext::real(m_invdiag(j))>RealScalar(0))
m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j));
}
else
{
for(Index j=0; j<mat.outerSize(); ++j)
{
RealScalar sum = mat.col(j).squaredNorm();
if(sum>RealScalar(0))
m_invdiag(j) = RealScalar(1)/sum; m_invdiag(j) = RealScalar(1)/sum;
else else
m_invdiag(j) = RealScalar(1); m_invdiag(j) = RealScalar(1);
} }
}
Base::m_isInitialized = true; Base::m_isInitialized = true;
return *this; return *this;
} }

View file

@ -298,30 +298,40 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR
} }
namespace internal { namespace internal {
template<typename VectorX, typename VectorY, typename OtherScalar>
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j) template<typename Scalar, typename OtherScalar,
int SizeAtCompileTime, int MinAlignment, bool Vectorizable>
struct apply_rotation_in_the_plane_selector
{ {
typedef typename VectorX::Scalar Scalar; static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
enum { PacketSize = packet_traits<Scalar>::size }; {
for(Index i=0; i<size; ++i)
{
Scalar xi = *x;
Scalar yi = *y;
*x = c * xi + numext::conj(s) * yi;
*y = -s * xi + numext::conj(c) * yi;
x += incrx;
y += incry;
}
}
};
template<typename Scalar, typename OtherScalar,
int SizeAtCompileTime, int MinAlignment>
struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */>
{
static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s)
{
enum {
PacketSize = packet_traits<Scalar>::size,
OtherPacketSize = packet_traits<OtherScalar>::size
};
typedef typename packet_traits<Scalar>::type Packet; typedef typename packet_traits<Scalar>::type Packet;
eigen_assert(xpr_x.size() == xpr_y.size()); typedef typename packet_traits<OtherScalar>::type OtherPacket;
Index size = xpr_x.size();
Index incrx = xpr_x.derived().innerStride();
Index incry = xpr_y.derived().innerStride();
Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
OtherScalar c = j.c();
OtherScalar s = j.s();
if (c==OtherScalar(1) && s==OtherScalar(0))
return;
/*** dynamic-size vectorized paths ***/ /*** dynamic-size vectorized paths ***/
if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1))
if(VectorX::SizeAtCompileTime == Dynamic &&
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
((incrx==1 && incry==1) || PacketSize == 1))
{ {
// both vectors are sequentially stored in memory => vectorization // both vectors are sequentially stored in memory => vectorization
enum { Peeling = 2 }; enum { Peeling = 2 };
@ -329,9 +339,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
Index alignedStart = internal::first_default_aligned(y, size); Index alignedStart = internal::first_default_aligned(y, size);
Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
const Packet pc = pset1<Packet>(c); const OtherPacket pc = pset1<OtherPacket>(c);
const Packet ps = pset1<Packet>(s); const OtherPacket ps = pset1<OtherPacket>(s);
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj;
conj_helper<OtherPacket,Packet,false,false> pm;
for(Index i=0; i<alignedStart; ++i) for(Index i=0; i<alignedStart; ++i)
{ {
@ -350,8 +361,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
{ {
Packet xi = pload<Packet>(px); Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py); Packet yi = pload<Packet>(py);
pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
px += PacketSize; px += PacketSize;
py += PacketSize; py += PacketSize;
} }
@ -365,10 +376,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
Packet xi1 = ploadu<Packet>(px+PacketSize); Packet xi1 = ploadu<Packet>(px+PacketSize);
Packet yi = pload <Packet>(py); Packet yi = pload <Packet>(py);
Packet yi1 = pload <Packet>(py+PacketSize); Packet yi1 = pload <Packet>(py+PacketSize);
pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1))); pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1)));
pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1))); pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1)));
px += Peeling*PacketSize; px += Peeling*PacketSize;
py += Peeling*PacketSize; py += Peeling*PacketSize;
} }
@ -376,8 +387,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
{ {
Packet xi = ploadu<Packet>(x+peelingEnd); Packet xi = ploadu<Packet>(x+peelingEnd);
Packet yi = pload <Packet>(y+peelingEnd); Packet yi = pload <Packet>(y+peelingEnd);
pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi))); pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi))); pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
} }
} }
@ -391,21 +402,20 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
} }
/*** fixed-size vectorized path ***/ /*** fixed-size vectorized path ***/
else if(VectorX::SizeAtCompileTime != Dynamic && else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment
(VectorX::Flags & VectorY::Flags & PacketAccessBit) &&
(EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment
{ {
const Packet pc = pset1<Packet>(c); const OtherPacket pc = pset1<OtherPacket>(c);
const Packet ps = pset1<Packet>(s); const OtherPacket ps = pset1<OtherPacket>(s);
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj;
conj_helper<OtherPacket,Packet,false,false> pm;
Scalar* EIGEN_RESTRICT px = x; Scalar* EIGEN_RESTRICT px = x;
Scalar* EIGEN_RESTRICT py = y; Scalar* EIGEN_RESTRICT py = y;
for(Index i=0; i<size; i+=PacketSize) for(Index i=0; i<size; i+=PacketSize)
{ {
Packet xi = pload<Packet>(px); Packet xi = pload<Packet>(px);
Packet yi = pload<Packet>(py); Packet yi = pload<Packet>(py);
pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi)));
pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi)));
px += PacketSize; px += PacketSize;
py += PacketSize; py += PacketSize;
} }
@ -414,16 +424,36 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x
/*** non-vectorized path ***/ /*** non-vectorized path ***/
else else
{ {
for(Index i=0; i<size; ++i) apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s);
}
}
};
template<typename VectorX, typename VectorY, typename OtherScalar>
void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j)
{ {
Scalar xi = *x; typedef typename VectorX::Scalar Scalar;
Scalar yi = *y; const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit)
*x = c * xi + numext::conj(s) * yi; && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size));
*y = -s * xi + numext::conj(c) * yi;
x += incrx; eigen_assert(xpr_x.size() == xpr_y.size());
y += incry; Index size = xpr_x.size();
} Index incrx = xpr_x.derived().innerStride();
} Index incry = xpr_y.derived().innerStride();
Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0);
Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0);
OtherScalar c = j.c();
OtherScalar s = j.s();
if (c==OtherScalar(1) && s==OtherScalar(0))
return;
apply_rotation_in_the_plane_selector<
Scalar,OtherScalar,
VectorX::SizeAtCompileTime,
EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment),
Vectorizable>::run(x,incrx,y,incry,size,c,s);
} }
} // end namespace internal } // end namespace internal

View file

@ -404,7 +404,7 @@ inline void MatrixBase<Derived>::computeInverseWithCheck(
const RealScalar& absDeterminantThreshold const RealScalar& absDeterminantThreshold
) const ) const
{ {
RealScalar determinant; Scalar determinant;
// i'd love to put some static assertions there, but SFINAE means that they have no effect... // i'd love to put some static assertions there, but SFINAE means that they have no effect...
eigen_assert(rows() == cols()); eigen_assert(rows() == cols());
computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold); computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);

View file

@ -1004,7 +1004,7 @@ static IndexType find_ordering /* return the number of garbage collections */
COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ; COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ;
/* get pivot column from head of minimum degree list */ /* get pivot column from head of minimum degree list */
while (head [min_score] == COLAMD_EMPTY && min_score < n_col) while (min_score < n_col && head [min_score] == COLAMD_EMPTY)
{ {
min_score++ ; min_score++ ;
} }

View file

@ -64,28 +64,28 @@ namespace internal
typedef typename _MatrixType::StorageIndex StorageIndex; typedef typename _MatrixType::StorageIndex StorageIndex;
}; };
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm) inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm)
{ {
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;} if (nbrhs == 0) {x = NULL; nbrhs=1;}
s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
} }
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm) inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm)
{ {
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;} if (nbrhs == 0) {x = NULL; nbrhs=1;}
d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm);
} }
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm) inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm)
{ {
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;} if (nbrhs == 0) {x = NULL; nbrhs=1;}
c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm); c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm);
} }
void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm) inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm)
{ {
if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; }
if (nbrhs == 0) {x = NULL; nbrhs=1;} if (nbrhs == 0) {x = NULL; nbrhs=1;}

View file

@ -506,8 +506,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k); m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k);
} }
RealScalar threshold_helper = numext::abs2<Scalar>(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows); RealScalar threshold_helper = numext::abs2<RealScalar>(m_colNormsUpdated.maxCoeff() * NumTraits<RealScalar>::epsilon()) / RealScalar(rows);
RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon()); RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon());
m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
m_maxpivot = RealScalar(0); m_maxpivot = RealScalar(0);
@ -553,11 +553,11 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
// http://www.netlib.org/lapack/lawnspdf/lawn176.pdf // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf
// and used in LAPACK routines xGEQPF and xGEQP3. // and used in LAPACK routines xGEQPF and xGEQP3.
// See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html
if (m_colNormsUpdated.coeffRef(j) != 0) { if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) {
RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j); RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j);
temp = (RealScalar(1) + temp) * (RealScalar(1) - temp); temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
temp = temp < 0 ? 0 : temp; temp = temp < RealScalar(0) ? RealScalar(0) : temp;
RealScalar temp2 = temp * numext::abs2<Scalar>(m_colNormsUpdated.coeffRef(j) / RealScalar temp2 = temp * numext::abs2<RealScalar>(m_colNormsUpdated.coeffRef(j) /
m_colNormsDirect.coeffRef(j)); m_colNormsDirect.coeffRef(j));
if (temp2 <= norm_downdate_threshold) { if (temp2 <= norm_downdate_threshold) {
// The updated norm has become too inaccurate so re-compute the column // The updated norm has become too inaccurate so re-compute the column

View file

@ -11,7 +11,7 @@
// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr> // Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr> // Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk> // Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
// Copyright (C) 2014-2016 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>
// //
// Source Code Form is subject to the terms of the Mozilla // Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
@ -77,6 +77,7 @@ public:
typedef _MatrixType MatrixType; typedef _MatrixType MatrixType;
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
typedef typename NumTraits<RealScalar>::Literal Literal;
enum { enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime, RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime,
@ -259,7 +260,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign
//**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
RealScalar scale = matrix.cwiseAbs().maxCoeff(); RealScalar scale = matrix.cwiseAbs().maxCoeff();
if(scale==RealScalar(0)) scale = RealScalar(1); if(scale==Literal(0)) scale = Literal(1);
MatrixX copy; MatrixX copy;
if (m_isTranspose) copy = matrix.adjoint()/scale; if (m_isTranspose) copy = matrix.adjoint()/scale;
else copy = matrix/scale; else copy = matrix/scale;
@ -351,13 +352,13 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co
Index k1=0, k2=0; Index k1=0, k2=0;
for(Index j=0; j<n; ++j) for(Index j=0; j<n; ++j)
{ {
if( (A.col(j).head(n1).array()!=0).any() ) if( (A.col(j).head(n1).array()!=Literal(0)).any() )
{ {
A1.col(k1) = A.col(j).head(n1); A1.col(k1) = A.col(j).head(n1);
B1.row(k1) = B.row(j); B1.row(k1) = B.row(j);
++k1; ++k1;
} }
if( (A.col(j).tail(n2).array()!=0).any() ) if( (A.col(j).tail(n2).array()!=Literal(0)).any() )
{ {
A2.col(k2) = A.col(j).tail(n2); A2.col(k2) = A.col(j).tail(n2);
B2.row(k2) = B.row(j); B2.row(k2) = B.row(j);
@ -449,11 +450,11 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW,
l = m_naiveU.row(1).segment(firstCol, k); l = m_naiveU.row(1).segment(firstCol, k);
f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1); f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
} }
if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1; if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1);
if (r0<considerZero) if (r0<considerZero)
{ {
c0 = 1; c0 = Literal(1);
s0 = 0; s0 = Literal(0);
} }
else else
{ {
@ -574,7 +575,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n); ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal(); m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal();
ArrayRef diag = m_workspace.head(n); ArrayRef diag = m_workspace.head(n);
diag(0) = 0; diag(0) = Literal(0);
// Allocate space for singular values and vectors // Allocate space for singular values and vectors
singVals.resize(n); singVals.resize(n);
@ -590,7 +591,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec
// but others are interleaved and we must ignore them at this stage. // but others are interleaved and we must ignore them at this stage.
// To this end, let's compute a permutation skipping them: // To this end, let's compute a permutation skipping them:
Index actual_n = n; Index actual_n = n;
while(actual_n>1 && diag(actual_n-1)==0) --actual_n; while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n;
Index m = 0; // size of the deflated problem Index m = 0; // size of the deflated problem
for(Index k=0;k<actual_n;++k) for(Index k=0;k<actual_n;++k)
if(abs(col0(k))>considerZero) if(abs(col0(k))>considerZero)
@ -691,11 +692,13 @@ template <typename MatrixType>
typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift) typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
{ {
Index m = perm.size(); Index m = perm.size();
RealScalar res = 1; RealScalar res = Literal(1);
for(Index i=0; i<m; ++i) for(Index i=0; i<m; ++i)
{ {
Index j = perm(i); Index j = perm(i);
res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu)); // The following expression could be rewritten to involve only a single division,
// but this would make the expression more sensitive to overflow.
res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
} }
return res; return res;
@ -707,19 +710,22 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
{ {
using std::abs; using std::abs;
using std::swap; using std::swap;
using std::sqrt;
Index n = col0.size(); Index n = col0.size();
Index actual_n = n; Index actual_n = n;
while(actual_n>1 && col0(actual_n-1)==0) --actual_n; // Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
// because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
for (Index k = 0; k < n; ++k) for (Index k = 0; k < n; ++k)
{ {
if (col0(k) == 0 || actual_n==1) if (col0(k) == Literal(0) || actual_n==1)
{ {
// if col0(k) == 0, then entry is deflated, so singular value is on diagonal // if col0(k) == 0, then entry is deflated, so singular value is on diagonal
// if actual_n==1, then the deflated problem is already diagonalized // if actual_n==1, then the deflated problem is already diagonalized
singVals(k) = k==0 ? col0(0) : diag(k); singVals(k) = k==0 ? col0(0) : diag(k);
mus(k) = 0; mus(k) = Literal(0);
shifts(k) = k==0 ? col0(0) : diag(k); shifts(k) = k==0 ? col0(0) : diag(k);
continue; continue;
} }
@ -731,15 +737,17 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
right = (diag(actual_n-1) + col0.matrix().norm()); right = (diag(actual_n-1) + col0.matrix().norm());
else else
{ {
// Skip deflated singular values // Skip deflated singular values,
// recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
// This should be equivalent to using perm[]
Index l = k+1; Index l = k+1;
while(col0(l)==0) { ++l; eigen_internal_assert(l<actual_n); } while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
right = diag(l); right = diag(l);
} }
// first decide whether it's closer to the left end or the right end // first decide whether it's closer to the left end or the right end
RealScalar mid = left + (right-left) / 2; RealScalar mid = left + (right-left) / Literal(2);
RealScalar fMid = secularEq(mid, col0, diag, perm, diag, 0); RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0));
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << right-left << "\n"; std::cout << right-left << "\n";
std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n"; std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n";
@ -755,7 +763,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
<< " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0) << " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0)
<< " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n"; << " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n";
#endif #endif
RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right;
// measure everything relative to shift // measure everything relative to shift
Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n); Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
@ -785,13 +793,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
// rational interpolation: fit a function of the form a / mu + b through the two previous // rational interpolation: fit a function of the form a / mu + b through the two previous
// iterates and use its zero to compute the next iterate // iterates and use its zero to compute the next iterate
bool useBisection = fPrev*fCur>0; bool useBisection = fPrev*fCur>Literal(0);
while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
{ {
++m_numIters; ++m_numIters;
// Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples. // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.
RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev); RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev);
RealScalar b = fCur - a / muCur; RealScalar b = fCur - a / muCur;
// And find mu such that f(mu)==0: // And find mu such that f(mu)==0:
RealScalar muZero = -a/b; RealScalar muZero = -a/b;
@ -803,8 +811,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
fCur = fZero; fCur = fZero;
if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true; if (shift == left && (muCur < Literal(0) || muCur > right - left)) useBisection = true;
if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true; if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true;
if (abs(fCur)>abs(fPrev)) useBisection = true; if (abs(fCur)>abs(fPrev)) useBisection = true;
} }
@ -817,14 +825,22 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
RealScalar leftShifted, rightShifted; RealScalar leftShifted, rightShifted;
if (shift == left) if (shift == left)
{ {
leftShifted = (std::numeric_limits<RealScalar>::min)(); // to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)),
// the factor 2 is to be more conservative
leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
// check that we did it right:
eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );
// I don't understand why the case k==0 would be special there: // I don't understand why the case k==0 would be special there:
// if (k == 0) rightShifted = right - left; else // if (k == 0) rightShifted = right - left; else
rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.6)); // theoretically we can take 0.5, but let's be safe rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe
} }
else else
{ {
leftShifted = -(right - left) * RealScalar(0.6); leftShifted = -(right - left) * RealScalar(0.51);
if(k+1<n)
rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
else
rightShifted = -(std::numeric_limits<RealScalar>::min)(); rightShifted = -(std::numeric_limits<RealScalar>::min)();
} }
@ -841,13 +857,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n";
} }
#endif #endif
eigen_internal_assert(fLeft * fRight < 0); eigen_internal_assert(fLeft * fRight < Literal(0));
while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted))) while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
{ {
RealScalar midShifted = (leftShifted + rightShifted) / 2; RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);
fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift); fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
if (fLeft * fMid < 0) if (fLeft * fMid < Literal(0))
{ {
rightShifted = midShifted; rightShifted = midShifted;
} }
@ -858,7 +874,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d
} }
} }
muCur = (leftShifted + rightShifted) / 2; muCur = (leftShifted + rightShifted) / Literal(2);
} }
singVals[k] = shift + muCur; singVals[k] = shift + muCur;
@ -892,8 +908,8 @@ void BDCSVD<MatrixType>::perturbCol0
// The offset permits to skip deflated entries while computing zhat // The offset permits to skip deflated entries while computing zhat
for (Index k = 0; k < n; ++k) for (Index k = 0; k < n; ++k)
{ {
if (col0(k) == 0) // deflated if (col0(k) == Literal(0)) // deflated
zhat(k) = 0; zhat(k) = Literal(0);
else else
{ {
// see equation (3.6) // see equation (3.6)
@ -918,7 +934,7 @@ void BDCSVD<MatrixType>::perturbCol0
std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n"; std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n";
#endif #endif
RealScalar tmp = sqrt(prod); RealScalar tmp = sqrt(prod);
zhat(k) = col0(k) > 0 ? tmp : -tmp; zhat(k) = col0(k) > Literal(0) ? tmp : -tmp;
} }
} }
} }
@ -934,7 +950,7 @@ void BDCSVD<MatrixType>::computeSingVecs
for (Index k = 0; k < n; ++k) for (Index k = 0; k < n; ++k)
{ {
if (zhat(k) == 0) if (zhat(k) == Literal(0))
{ {
U.col(k) = VectorType::Unit(n+1, k); U.col(k) = VectorType::Unit(n+1, k);
if (m_compV) V.col(k) = VectorType::Unit(n, k); if (m_compV) V.col(k) = VectorType::Unit(n, k);
@ -947,7 +963,7 @@ void BDCSVD<MatrixType>::computeSingVecs
Index i = perm(l); Index i = perm(l);
U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
} }
U(n,k) = 0; U(n,k) = Literal(0);
U.col(k).normalize(); U.col(k).normalize();
if (m_compV) if (m_compV)
@ -958,7 +974,7 @@ void BDCSVD<MatrixType>::computeSingVecs
Index i = perm(l); Index i = perm(l);
V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
} }
V(0,k) = -1; V(0,k) = Literal(-1);
V.col(k).normalize(); V.col(k).normalize();
} }
} }
@ -979,15 +995,15 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index
Index start = firstCol + shift; Index start = firstCol + shift;
RealScalar c = m_computed(start, start); RealScalar c = m_computed(start, start);
RealScalar s = m_computed(start+i, start); RealScalar s = m_computed(start+i, start);
RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); RealScalar r = numext::hypot(c,s);
if (r == 0) if (r == Literal(0))
{ {
m_computed(start+i, start+i) = 0; m_computed(start+i, start+i) = Literal(0);
return; return;
} }
m_computed(start,start) = r; m_computed(start,start) = r;
m_computed(start+i, start) = 0; m_computed(start+i, start) = Literal(0);
m_computed(start+i, start+i) = 0; m_computed(start+i, start+i) = Literal(0);
JacobiRotation<RealScalar> J(c/r,-s/r); JacobiRotation<RealScalar> J(c/r,-s/r);
if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J); if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);
@ -1020,7 +1036,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
<< m_computed(firstColm + i+1, firstColm+i+1) << " " << m_computed(firstColm + i+1, firstColm+i+1) << " "
<< m_computed(firstColm + i+2, firstColm+i+2) << "\n"; << m_computed(firstColm + i+2, firstColm+i+2) << "\n";
#endif #endif
if (r==0) if (r==Literal(0))
{ {
m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
return; return;
@ -1029,7 +1045,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi
s/=r; s/=r;
m_computed(firstColm + i, firstColm) = r; m_computed(firstColm + i, firstColm) = r;
m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
m_computed(firstColm + j, firstColm) = 0; m_computed(firstColm + j, firstColm) = Literal(0);
JacobiRotation<RealScalar> J(c,-s); JacobiRotation<RealScalar> J(c,-s);
if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J); if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);
@ -1053,7 +1069,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)(); const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag); RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag); RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
#ifdef EIGEN_BDCSVD_SANITY_CHECKS #ifdef EIGEN_BDCSVD_SANITY_CHECKS
assert(m_naiveU.allFinite()); assert(m_naiveU.allFinite());
@ -1081,7 +1097,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index
#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n"; std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n";
#endif #endif
col0(i) = 0; col0(i) = Literal(0);
} }
//condition 4.3 //condition 4.3

View file

@ -61,9 +61,10 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv
u = (LAPACKE_TYPE*)m_matrixU.data(); \ u = (LAPACKE_TYPE*)m_matrixU.data(); \
} else { ldu=1; u=&dummy; }\ } else { ldu=1; u=&dummy; }\
MatrixType localV; \ MatrixType localV; \
ldvt = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \ lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \
if (computeV()) { \ if (computeV()) { \
localV.resize(ldvt, m_cols); \ localV.resize(vt_rows, m_cols); \
ldvt = internal::convert_index<lapack_int>(localV.outerStride()); \
vt = (LAPACKE_TYPE*)localV.data(); \ vt = (LAPACKE_TYPE*)localV.data(); \
} else { ldvt=1; vt=&dummy; }\ } else { ldvt=1; vt=&dummy; }\
Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \ Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \

View file

@ -159,6 +159,8 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
traits<MatrixType>::Flags & RowMajorBit> > Y) traits<MatrixType>::Flags & RowMajorBit> > Y)
{ {
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef typename NumTraits<RealScalar>::Literal Literal;
enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit }; enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit };
typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride;
typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride;
@ -263,7 +265,7 @@ void upperbidiagonalization_blocked_helper(MatrixType& A,
SubMatType A10( A.block(bs,0, brows-bs,bs) ); SubMatType A10( A.block(bs,0, brows-bs,bs) );
SubMatType A01( A.block(0,bs, bs,bcols-bs) ); SubMatType A01( A.block(0,bs, bs,bcols-bs) );
Scalar tmp = A01(bs-1,0); Scalar tmp = A01(bs-1,0);
A01(bs-1,0) = 1; A01(bs-1,0) = Literal(1);
A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint(); A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint();
A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01; A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01;
A01(bs-1,0) = tmp; A01(bs-1,0) = tmp;

View file

@ -94,7 +94,7 @@ class AmbiVector
Index allocSize = m_allocatedElements * sizeof(ListEl); Index allocSize = m_allocatedElements * sizeof(ListEl);
allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar); allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar);
Scalar* newBuffer = new Scalar[allocSize]; Scalar* newBuffer = new Scalar[allocSize];
memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl)); std::memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl));
delete[] m_buffer; delete[] m_buffer;
m_buffer = newBuffer; m_buffer = newBuffer;
} }

View file

@ -17,7 +17,9 @@ namespace internal {
template<typename Lhs, typename Rhs, typename ResultType> template<typename Lhs, typename Rhs, typename ResultType>
static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false) static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false)
{ {
typedef typename remove_all<Lhs>::type::Scalar Scalar; typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
typedef typename remove_all<ResultType>::type::Scalar ResScalar;
// make sure to call innerSize/outerSize since we fake the storage order. // make sure to call innerSize/outerSize since we fake the storage order.
Index rows = lhs.innerSize(); Index rows = lhs.innerSize();
@ -25,7 +27,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
eigen_assert(lhs.outerSize() == rhs.innerSize()); eigen_assert(lhs.outerSize() == rhs.innerSize());
ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0); ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0);
ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0);
std::memset(mask,0,sizeof(bool)*rows); std::memset(mask,0,sizeof(bool)*rows);
@ -51,12 +53,12 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
Index nnz = 0; Index nnz = 0;
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{ {
Scalar y = rhsIt.value(); RhsScalar y = rhsIt.value();
Index k = rhsIt.index(); Index k = rhsIt.index();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
{ {
Index i = lhsIt.index(); Index i = lhsIt.index();
Scalar x = lhsIt.value(); LhsScalar x = lhsIt.value();
if(!mask[i]) if(!mask[i])
{ {
mask[i] = true; mask[i] = true;
@ -166,10 +168,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRhs;
RowMajorMatrix rhsRow = rhs; typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
RowMajorMatrix resRow(lhs.rows(), rhs.cols()); RowMajorRhs rhsRow = rhs;
internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow); RowMajorRes resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<RowMajorRhs,Lhs,RowMajorRes>(rhsRow, lhs, resRow);
res = resRow; res = resRow;
} }
}; };
@ -179,10 +182,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorLhs;
RowMajorMatrix lhsRow = lhs; typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes;
RowMajorMatrix resRow(lhs.rows(), rhs.cols()); RowMajorLhs lhsRow = lhs;
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow); RowMajorRes resRow(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorLhs,RowMajorRes>(rhs, lhsRow, resRow);
res = resRow; res = resRow;
} }
}; };
@ -219,10 +223,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
ColMajorMatrix lhsCol = lhs; typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
ColMajorMatrix resCol(lhs.rows(), rhs.cols()); ColMajorLhs lhsCol = lhs;
internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol); ColMajorRes resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<ColMajorLhs,Rhs,ColMajorRes>(lhsCol, rhs, resCol);
res = resCol; res = resCol;
} }
}; };
@ -232,10 +237,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
ColMajorMatrix rhsCol = rhs; typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes;
ColMajorMatrix resCol(lhs.rows(), rhs.cols()); ColMajorRhs rhsCol = rhs;
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol); ColMajorRes resCol(lhs.rows(), rhs.cols());
internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorRhs,ColMajorRes>(lhs, rhsCol, resCol);
res = resCol; res = resCol;
} }
}; };
@ -263,7 +269,8 @@ namespace internal {
template<typename Lhs, typename Rhs, typename ResultType> template<typename Lhs, typename Rhs, typename ResultType>
static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef typename remove_all<Lhs>::type::Scalar Scalar; typedef typename remove_all<Lhs>::type::Scalar LhsScalar;
typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
Index cols = rhs.outerSize(); Index cols = rhs.outerSize();
eigen_assert(lhs.outerSize() == rhs.innerSize()); eigen_assert(lhs.outerSize() == rhs.innerSize());
@ -274,12 +281,12 @@ static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs,
{ {
for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt)
{ {
Scalar y = rhsIt.value(); RhsScalar y = rhsIt.value();
Index k = rhsIt.index(); Index k = rhsIt.index();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt)
{ {
Index i = lhsIt.index(); Index i = lhsIt.index();
Scalar x = lhsIt.value(); LhsScalar x = lhsIt.value();
res.coeffRef(i,j) += x * y; res.coeffRef(i,j) += x * y;
} }
} }
@ -310,9 +317,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMa
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs;
ColMajorMatrix lhsCol(lhs); ColMajorLhs lhsCol(lhs);
internal::sparse_sparse_to_dense_product_impl<ColMajorMatrix,Rhs,ResultType>(lhsCol, rhs, res); internal::sparse_sparse_to_dense_product_impl<ColMajorLhs,Rhs,ResultType>(lhsCol, rhs, res);
} }
}; };
@ -321,9 +328,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMa
{ {
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs;
ColMajorMatrix rhsCol(rhs); ColMajorRhs rhsCol(rhs);
internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorMatrix,ResultType>(lhs, rhsCol, res); internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorRhs,ResultType>(lhs, rhsCol, res);
} }
}; };

View file

@ -47,6 +47,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
enum { enum {
Mode = _Mode, Mode = _Mode,
TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0),
RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime, RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime,
ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime
}; };
@ -310,7 +311,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
while (i && i.index()<j) ++i; while (i && i.index()<j) ++i;
if(i && i.index()==j) if(i && i.index()==j)
{ {
res(j,k) += alpha * i.value() * rhs(j,k); res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);
++i; ++i;
} }
} }
@ -323,14 +324,14 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons
{ {
LhsScalar lhs_ij = i.value(); LhsScalar lhs_ij = i.value();
if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij); if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij);
res_j += lhs_ij * rhs(i.index(),k); res_j += lhs_ij * rhs.coeff(i.index(),k);
res(i.index(),k) += numext::conj(lhs_ij) * rhs_j; res(i.index(),k) += numext::conj(lhs_ij) * rhs_j;
} }
res(j,k) += alpha * res_j; res.coeffRef(j,k) += alpha * res_j;
// handle diagonal coeff // handle diagonal coeff
if (ProcessFirstHalf && i && (i.index()==j)) if (ProcessFirstHalf && i && (i.index()==j))
res(j,k) += alpha * i.value() * rhs(j,k); res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k);
} }
} }
} }
@ -368,7 +369,7 @@ struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, Pr
// transpose everything // transpose everything
Transpose<Dest> dstT(dst); Transpose<Dest> dstT(dst);
internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); internal::sparse_selfadjoint_time_dense_product<RhsView::TransposeMode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
} }
}; };

View file

@ -21,7 +21,8 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
{ {
// return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res); // return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res);
typedef typename remove_all<Lhs>::type::Scalar Scalar; typedef typename remove_all<Rhs>::type::Scalar RhsScalar;
typedef typename remove_all<ResultType>::type::Scalar ResScalar;
typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex; typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex;
// make sure to call innerSize/outerSize since we fake the storage order. // make sure to call innerSize/outerSize since we fake the storage order.
@ -31,7 +32,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
eigen_assert(lhs.outerSize() == rhs.innerSize()); eigen_assert(lhs.outerSize() == rhs.innerSize());
// allocate a temporary buffer // allocate a temporary buffer
AmbiVector<Scalar,StorageIndex> tempVector(rows); AmbiVector<ResScalar,StorageIndex> tempVector(rows);
// mimics a resizeByInnerOuter: // mimics a resizeByInnerOuter:
if(ResultType::IsRowMajor) if(ResultType::IsRowMajor)
@ -63,14 +64,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
{ {
// FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index()) // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
tempVector.restart(); tempVector.restart();
Scalar x = rhsIt.value(); RhsScalar x = rhsIt.value();
for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt) for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt)
{ {
tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x; tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x;
} }
} }
res.startVec(j); res.startVec(j);
for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it) for (typename AmbiVector<ResScalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it)
res.insertBackByOuterInner(j,it.index()) = it.value(); res.insertBackByOuterInner(j,it.index()) = it.value();
} }
res.finalize(); res.finalize();
@ -85,7 +86,6 @@ struct sparse_sparse_product_with_pruning_selector;
template<typename Lhs, typename Rhs, typename ResultType> template<typename Lhs, typename Rhs, typename ResultType>
struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor> struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
{ {
typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar;
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
@ -129,8 +129,8 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs; typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs; typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
ColMajorMatrixLhs colLhs(lhs); ColMajorMatrixLhs colLhs(lhs);
ColMajorMatrixRhs colRhs(rhs); ColMajorMatrixRhs colRhs(rhs);
internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance); internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance);
@ -149,7 +149,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{ {
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs; typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs;
RowMajorMatrixLhs rowLhs(lhs); RowMajorMatrixLhs rowLhs(lhs);
sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance); sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance);
} }
@ -161,7 +161,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{ {
typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs; typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs;
RowMajorMatrixRhs rowRhs(rhs); RowMajorMatrixRhs rowRhs(rhs);
sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance); sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance);
} }
@ -173,7 +173,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs; typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs;
ColMajorMatrixRhs colRhs(rhs); ColMajorMatrixRhs colRhs(rhs);
internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance); internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance);
} }
@ -185,7 +185,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C
typedef typename ResultType::RealScalar RealScalar; typedef typename ResultType::RealScalar RealScalar;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
{ {
typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs; typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs;
ColMajorMatrixLhs colLhs(lhs); ColMajorMatrixLhs colLhs(lhs);
internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance); internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance);
} }

View file

@ -52,7 +52,7 @@ namespace internal {
* rank-revealing permutations. Use colsPermutation() to get it. * rank-revealing permutations. Use colsPermutation() to get it.
* *
* Q is the orthogonal matrix represented as products of Householder reflectors. * Q is the orthogonal matrix represented as products of Householder reflectors.
* Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose. * Use matrixQ() to get an expression and matrixQ().adjoint() to get the adjoint.
* You can then apply it to a vector. * You can then apply it to a vector.
* *
* R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient. * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient.
@ -65,6 +65,7 @@ namespace internal {
* \implsparsesolverconcept * \implsparsesolverconcept
* *
* \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()). * \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()).
* \warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix.
* *
*/ */
template<typename _MatrixType, typename _OrderingType> template<typename _MatrixType, typename _OrderingType>
@ -196,9 +197,9 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
Index rank = this->rank(); Index rank = this->rank();
// Compute Q^T * b; // Compute Q^* * b;
typename Dest::PlainObject y, b; typename Dest::PlainObject y, b;
y = this->matrixQ().transpose() * B; y = this->matrixQ().adjoint() * B;
b = y; b = y;
// Solve with the triangular matrix R // Solve with the triangular matrix R
@ -604,7 +605,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
// Get the references // Get the references
SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) : SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) :
m_qr(qr),m_other(other),m_transpose(transpose) {} m_qr(qr),m_other(other),m_transpose(transpose) {}
inline Index rows() const { return m_transpose ? m_qr.rows() : m_qr.cols(); } inline Index rows() const { return m_qr.matrixQ().rows(); }
inline Index cols() const { return m_other.cols(); } inline Index cols() const { return m_other.cols(); }
// Assign to a vector // Assign to a vector
@ -632,7 +633,10 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
} }
else else
{ {
eigen_assert(m_qr.m_Q.rows() == m_other.rows() && "Non conforming object sizes"); eigen_assert(m_qr.matrixQ().cols() == m_other.rows() && "Non conforming object sizes");
res.conservativeResize(rows(), cols());
// Compute res = Q * other column by column // Compute res = Q * other column by column
for(Index j = 0; j < res.cols(); j++) for(Index j = 0; j < res.cols(); j++)
{ {
@ -641,7 +645,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
Scalar tau = Scalar(0); Scalar tau = Scalar(0);
tau = m_qr.m_Q.col(k).dot(res.col(j)); tau = m_qr.m_Q.col(k).dot(res.col(j));
if(tau==Scalar(0)) continue; if(tau==Scalar(0)) continue;
tau = tau * m_qr.m_hcoeffs(k); tau = tau * numext::conj(m_qr.m_hcoeffs(k));
res.col(j) -= tau * m_qr.m_Q.col(k); res.col(j) -= tau * m_qr.m_Q.col(k);
} }
} }
@ -650,7 +654,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived
const SparseQRType& m_qr; const SparseQRType& m_qr;
const Derived& m_other; const Derived& m_other;
bool m_transpose; bool m_transpose; // TODO this actually means adjoint
}; };
template<typename SparseQRType> template<typename SparseQRType>
@ -668,13 +672,14 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
{ {
return SparseQR_QProduct<SparseQRType,Derived>(m_qr,other.derived(),false); return SparseQR_QProduct<SparseQRType,Derived>(m_qr,other.derived(),false);
} }
// To use for operations with the adjoint of Q
SparseQRMatrixQTransposeReturnType<SparseQRType> adjoint() const SparseQRMatrixQTransposeReturnType<SparseQRType> adjoint() const
{ {
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr); return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
} }
inline Index rows() const { return m_qr.rows(); } inline Index rows() const { return m_qr.rows(); }
inline Index cols() const { return (std::min)(m_qr.rows(),m_qr.cols()); } inline Index cols() const { return m_qr.rows(); }
// To use for operations with the transpose of Q // To use for operations with the transpose of Q FIXME this is the same as adjoint at the moment
SparseQRMatrixQTransposeReturnType<SparseQRType> transpose() const SparseQRMatrixQTransposeReturnType<SparseQRType> transpose() const
{ {
return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr); return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr);
@ -682,6 +687,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp
const SparseQRType& m_qr; const SparseQRType& m_qr;
}; };
// TODO this actually represents the adjoint of Q
template<typename SparseQRType> template<typename SparseQRType>
struct SparseQRMatrixQTransposeReturnType struct SparseQRMatrixQTransposeReturnType
{ {
@ -712,7 +718,7 @@ struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal:
typedef typename DstXprType::StorageIndex StorageIndex; typedef typename DstXprType::StorageIndex StorageIndex;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/) static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
{ {
typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows()); typename DstXprType::PlainObject idMat(src.rows(), src.cols());
idMat.setIdentity(); idMat.setIdentity();
// Sort the sparse householder reflectors if needed // Sort the sparse householder reflectors if needed
const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q(); const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q();

Some files were not shown because too many files have changed in this diff Show more