Added dependencies on the Intel Thread Building Blocks.

Changed the C++ parallelization code to Intel Thread Building Blocks.
2025-10-23 00:31:11 -06:00 · 2017-03-07 13:03:14 +01:00 · 2017-03-07 13:03:14 +01:00 · cb1a6eae1e
commit cb1a6eae1e
parent 8a42c0ad9f
6 changed files with 158 additions and 96 deletions
--- a/xs/src/libslic3r/GCode.cpp
+++ b/xs/src/libslic3r/GCode.cpp
@ -1029,6 +1029,7 @@ GCode::needs_retraction(const Polyline &travel, ExtrusionRole role)
    
    if (role == erSupportMaterial) {
        const SupportLayer* support_layer = dynamic_cast<const SupportLayer*>(this->layer);
+        //FIXME support_layer->support_islands.contains should use some search structure!
        if (support_layer != NULL && support_layer->support_islands.contains(travel)) {
            // skip retraction if this is a travel move inside a support material island
            return false;
--- a/xs/src/libslic3r/PrintObject.cpp
+++ b/xs/src/libslic3r/PrintObject.cpp
@ -7,6 +7,9 @@
 #include <utility>
 #include <boost/log/trivial.hpp>

+#include <tbb/parallel_for.h>
+#include <tbb/atomic.h>
+
 #include <Shiny/Shiny.h>

 #ifdef SLIC3R_DEBUG_SLICE_PROCESSING
@ -632,7 +635,7 @@ PrintObject::discover_vertical_shells()
                        LayerRegion &neighbor_region = *neighbor_layer.get_region(int(idx_region));
                        Polygons newholes;
                        for (size_t idx_region = 0; idx_region < this->_print->regions.size(); ++ idx_region)
-                            polygons_append(newholes, to_polygons(neighbor_layer.get_region(idx_region)->fill_expolygons));
+                            polygons_append(newholes, to_polygons(neighbor_layer.regions[idx_region]->fill_expolygons));
                        if (hole_first) {
                            hole_first = false;
                            polygons_append(holes, STDMOVE(newholes));
@ -1267,13 +1270,16 @@ PrintObject::_make_perimeters()
        }
    }
    
-    BOOST_LOG_TRIVIAL(debug) << "Generating perimeters in parallel";
-    parallelize<Layer*>(
-        std::queue<Layer*>(std::deque<Layer*>(this->layers.begin(), this->layers.end())),  // cast LayerPtrs to std::queue<Layer*>
-        boost::bind(&Slic3r::Layer::make_perimeters, _1),
-        this->_print->config.threads.value
+    BOOST_LOG_TRIVIAL(debug) << "Generating perimeters in parallel - start";
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, this->layers.size()),
+        [this](const tbb::blocked_range<size_t>& range) {
+            for (size_t layer_idx = range.begin(); layer_idx < range.end(); ++ layer_idx)
+                this->layers[layer_idx]->make_perimeters();
+        }
    );
-    
+    BOOST_LOG_TRIVIAL(debug) << "Generating perimeters in parallel - end";
+
    /*
        simplify slices (both layer and region slices),
        we only need the max resolution for perimeters
@ -1290,13 +1296,16 @@ PrintObject::_infill()
    if (this->state.is_done(posInfill)) return;
    this->state.set_started(posInfill);
    
-    BOOST_LOG_TRIVIAL(debug) << "Filling layers in parallel";
-    parallelize<Layer*>(
-        std::queue<Layer*>(std::deque<Layer*>(this->layers.begin(), this->layers.end())),  // cast LayerPtrs to std::queue<Layer*>
-        boost::bind(&Slic3r::Layer::make_fills, _1),
-        this->_print->config.threads.value
+    BOOST_LOG_TRIVIAL(debug) << "Filling layers in parallel - start";
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, this->layers.size()),
+        [this](const tbb::blocked_range<size_t>& range) {
+            for (size_t layer_idx = range.begin(); layer_idx < range.end(); ++ layer_idx)
+                this->layers[layer_idx]->make_fills();
+        }
    );
-    
+    BOOST_LOG_TRIVIAL(debug) << "Filling layers in parallel - end";
+
    /*  we could free memory now, but this would make this step not idempotent
    ### $_->fill_surfaces->clear for map @{$_->regions}, @{$object->layers};
    */
--- a/xs/src/libslic3r/TriangleMesh.cpp
+++ b/xs/src/libslic3r/TriangleMesh.cpp
@ -13,6 +13,8 @@

 #include <boost/log/trivial.hpp>

+#include <tbb/parallel_for.h>
+
 #if 0
    #define DEBUG
    #define _DEBUG
@ -672,10 +674,12 @@ TriangleMeshSlicer::slice(const std::vector<float> &z, std::vector<Polygons>* la
    std::vector<IntersectionLines> lines(z.size());
    {
        boost::mutex lines_mutex;
-        parallelize<int>(
-            0,
-            this->mesh->stl.stats.number_of_facets-1,
-            boost::bind(&TriangleMeshSlicer::_slice_do, this, _1, &lines, &lines_mutex, z)
+        tbb::parallel_for(
+            tbb::blocked_range<int>(0,this->mesh->stl.stats.number_of_facets),
+            [&lines, &lines_mutex, &z, this](const tbb::blocked_range<int>& range) {
+                for (int facet_idx = range.begin(); facet_idx < range.end(); ++ facet_idx)
+                    this->_slice_do(facet_idx, &lines, &lines_mutex, z);
+            }
        );
    }
    
@ -684,10 +688,12 @@ TriangleMeshSlicer::slice(const std::vector<float> &z, std::vector<Polygons>* la
    // build loops
    BOOST_LOG_TRIVIAL(trace) << "TriangleMeshSlicer::_make_loops_do";
    layers->resize(z.size());
-    parallelize<size_t>(
-        0,
-        lines.size()-1,
-        boost::bind(&TriangleMeshSlicer::_make_loops_do, this, _1, &lines, layers)
+    tbb::parallel_for(
+        tbb::blocked_range<size_t>(0, lines.size()),
+        [&lines, &layers, this](const tbb::blocked_range<size_t>& range) {
+            for (size_t line_idx = range.begin(); line_idx < range.end(); ++ line_idx)
+                this->make_loops(lines[line_idx], &(*layers)[line_idx]);
+        }
    );
    BOOST_LOG_TRIVIAL(trace) << "TriangleMeshSlicer::slice finished";
 }
@ -873,12 +879,6 @@ bool TriangleMeshSlicer::slice_facet(
    return false;
 }

-void
-TriangleMeshSlicer::_make_loops_do(size_t i, std::vector<IntersectionLines>* lines, std::vector<Polygons>* layers) const
-{
-    this->make_loops((*lines)[i], &(*layers)[i]);
-}
-
 void TriangleMeshSlicer::make_loops(std::vector<IntersectionLine> &lines, Polygons* loops) const
 {
    // Remove tangent edges.
--- a/xs/src/libslic3r/TriangleMesh.hpp
+++ b/xs/src/libslic3r/TriangleMesh.hpp
@ -132,7 +132,6 @@ private:
    std::vector<stl_vertex>  v_scaled_shared;

    void _slice_do(size_t facet_idx, std::vector<IntersectionLines>* lines, boost::mutex* lines_mutex, const std::vector<float> &z) const;
-    void _make_loops_do(size_t i, std::vector<IntersectionLines>* lines, std::vector<Polygons>* layers) const;
    void make_loops(std::vector<IntersectionLine> &lines, Polygons* loops) const;
    void make_expolygons(const Polygons &loops, ExPolygons* slices) const;
    void make_expolygons_simple(std::vector<IntersectionLine> &lines, ExPolygons* slices) const;
--- a/xs/src/libslic3r/libslic3r.h
+++ b/xs/src/libslic3r/libslic3r.h
@ -14,7 +14,7 @@
 #include <boost/thread.hpp>

 #define SLIC3R_FORK_NAME "Slic3r Prusa Edition"
-#define SLIC3R_VERSION "1.31.6"
+#define SLIC3R_VERSION "1.33.8.devel"
 #define SLIC3R_BUILD "UNKNOWN"

 //FIXME This epsilon value is used for many non-related purposes:
@ -99,53 +99,6 @@ inline void append_to(std::vector<T> &dst, const std::vector<T> &src)
    dst.insert(dst.end(), src.begin(), src.end());
 }

-template <class T> void
-_parallelize_do(std::queue<T>* queue, boost::mutex* queue_mutex, boost::function<void(T)> func)
-{
-    //std::cout << "THREAD STARTED: " << boost::this_thread::get_id() << std::endl;
-    while (true) {
-        T i;
-        {
-            boost::lock_guard<boost::mutex> l(*queue_mutex);
-            if (queue->empty()) return;
-            i = queue->front();
-            queue->pop();
-        }
-        //std::cout << "  Thread " << boost::this_thread::get_id() << " processing item " << i << std::endl;
-        func(i);
-        boost::this_thread::interruption_point();
-    }
-}
-
-template <class T> void
-parallelize(std::queue<T> queue, boost::function<void(T)> func,
-    int threads_count = boost::thread::hardware_concurrency())
-{
-#ifdef SLIC3R_PROFILE
-    while (! queue.empty()) {
-        func(queue.front());
-        queue.pop();
-    }
-#else
-    if (threads_count == 0)
-        threads_count = 2;
-    boost::mutex queue_mutex;
-    boost::thread_group workers;
-    for (int i = 0; i < std::min(threads_count, int(queue.size())); ++ i)
-        workers.add_thread(new boost::thread(&_parallelize_do<T>, &queue, &queue_mutex, func));
-    workers.join_all();
-#endif
-}
-
-template <class T> void
-parallelize(T start, T end, boost::function<void(T)> func,
-    int threads_count = boost::thread::hardware_concurrency())
-{
-    std::queue<T> queue;
-    for (T i = start; i <= end; ++i) queue.push(i);
-    parallelize(queue, func, threads_count);
-}
-
 template <typename T>
 void append(std::vector<T>& dest, const std::vector<T>& src)
 {