123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521 |
- /*
- Copyright 2005-2013 Intel Corporation. All Rights Reserved.
- This file is part of Threading Building Blocks.
- Threading Building Blocks is free software; you can redistribute it
- and/or modify it under the terms of the GNU General Public License
- version 2 as published by the Free Software Foundation.
- Threading Building Blocks is distributed in the hope that it will be
- useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with Threading Building Blocks; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- As a special exception, you may use this file as part of a free software
- library without restriction. Specifically, if other files instantiate
- templates or use macros or inline functions from this file, or you compile
- this file and link it with other files to produce an executable, this
- file does not by itself cause the resulting executable to be covered by
- the GNU General Public License. This exception does not however
- invalidate any other reasons why the executable file might be covered by
- the GNU General Public License.
- */
- #ifndef __TBB_partitioner_H
- #define __TBB_partitioner_H
- #ifndef __TBB_INITIAL_CHUNKS
- #define __TBB_INITIAL_CHUNKS 2
- #endif
- #ifndef __TBB_RANGE_POOL_CAPACITY
- #define __TBB_RANGE_POOL_CAPACITY 8
- #endif
- #ifndef __TBB_INIT_DEPTH
- #define __TBB_INIT_DEPTH 5
- #endif
- #include "task.h"
- #include "aligned_space.h"
- #include "atomic.h"
- #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
- // Workaround for overzealous compiler warnings
- #pragma warning (push)
- #pragma warning (disable: 4244)
- #endif
- namespace tbb {
- class auto_partitioner;
- class simple_partitioner;
- class affinity_partitioner;
- namespace interface6 {
- namespace internal {
- class affinity_partition_type;
- }
- }
- namespace internal {
- size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
- //! Defines entry point for affinity partitioner into tbb run-time library.
- class affinity_partitioner_base_v3: no_copy {
- friend class tbb::affinity_partitioner;
- friend class tbb::interface6::internal::affinity_partition_type;
- //! Array that remembers affinities of tree positions to affinity_id.
- /** NULL if my_size==0. */
- affinity_id* my_array;
- //! Number of elements in my_array.
- size_t my_size;
- //! Zeros the fields.
- affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
- //! Deallocates my_array.
- ~affinity_partitioner_base_v3() {resize(0);}
- //! Resize my_array.
- /** Retains values if resulting size is the same. */
- void __TBB_EXPORTED_METHOD resize( unsigned factor );
- };
- //! Provides backward-compatible methods for partition objects without affinity.
- class partition_type_base {
- public:
- void set_affinity( task & ) {}
- void note_affinity( task::affinity_id ) {}
- task* continue_after_execute_range() {return NULL;}
- bool decide_whether_to_delay() {return false;}
- void spawn_or_delay( bool, task& b ) {
- task::spawn(b);
- }
- };
- template<typename Range, typename Body, typename Partitioner> class start_scan;
- } // namespace internal
- //! @endcond
- namespace serial {
- namespace interface6 {
- template<typename Range, typename Body, typename Partitioner> class start_for;
- }
- }
- namespace interface6 {
- //! @cond INTERNAL
- namespace internal {
- using namespace tbb::internal;
- template<typename Range, typename Body, typename Partitioner> class start_for;
- template<typename Range, typename Body, typename Partitioner> class start_reduce;
- //! Join task node that contains shared flag for stealing feedback
- class flag_task: public task {
- public:
- tbb::atomic<bool> my_child_stolen;
- flag_task() { my_child_stolen = false; }
- task* execute() { return NULL; }
- static void mark_task_stolen(task &t) {
- tbb::atomic<bool> &flag = static_cast<flag_task*>(t.parent())->my_child_stolen;
- #if TBB_USE_THREADING_TOOLS
- // Threading tools respect lock prefix but report false-positive data-race via plain store
- flag.fetch_and_store<release>(true);
- #else
- flag = true;
- #endif //TBB_USE_THREADING_TOOLS
- }
- static bool is_peer_stolen(task &t) {
- return static_cast<flag_task*>(t.parent())->my_child_stolen;
- }
- };
- //! Task to signal the demand without carrying the work
- class signal_task: public task {
- public:
- task* execute() {
- if( is_stolen_task() ) {
- flag_task::mark_task_stolen(*this);
- }
- return NULL;
- }
- };
- //! Depth is a relative depth of recursive division inside a range pool. Relative depth allows
- //! infinite absolute depth of the recursion for heavily imbalanced workloads with range represented
- //! by a number that cannot fit into machine word.
- typedef unsigned char depth_t;
- //! Range pool stores ranges of type T in a circular buffer with MaxCapacity
- template <typename T, depth_t MaxCapacity>
- class range_vector {
- depth_t my_head;
- depth_t my_tail;
- depth_t my_size;
- depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
- tbb::aligned_space<T, MaxCapacity> my_pool;
- public:
- //! initialize via first range in pool
- range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
- my_depth[0] = 0;
- new( my_pool.begin() ) T(elem);//TODO: std::move?
- }
- ~range_vector() {
- while( !empty() ) pop_back();
- }
- bool empty() const { return my_size == 0; }
- depth_t size() const { return my_size; }
- //! Populates range pool via ranges up to max depth or while divisible
- //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces
- void split_to_fill(depth_t max_depth) {
- while( my_size < MaxCapacity && my_depth[my_head] < max_depth
- && my_pool.begin()[my_head].is_divisible() ) {
- depth_t prev = my_head;
- my_head = (my_head + 1) % MaxCapacity;
- new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move?
- my_pool.begin()[prev].~T(); // instead of assignment
- new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split()); // do 'inverse' split
- my_depth[my_head] = ++my_depth[prev];
- my_size++;
- }
- }
- void pop_back() {
- __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
- my_pool.begin()[my_head].~T();
- my_size--;
- my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
- }
- void pop_front() {
- __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
- my_pool.begin()[my_tail].~T();
- my_size--;
- my_tail = (my_tail + 1) % MaxCapacity;
- }
- T& back() {
- __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
- return my_pool.begin()[my_head];
- }
- T& front() {
- __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
- return my_pool.begin()[my_tail];
- }
- //! similarly to front(), returns depth of the first range in the pool
- depth_t front_depth() {
- __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
- return my_depth[my_tail];
- }
- };
- //! Provides default methods for partition objects and common algorithm blocks.
- template <typename Partition>
- struct partition_type_base {
- // decision makers
- void set_affinity( task & ) {}
- void note_affinity( task::affinity_id ) {}
- bool check_being_stolen(task &) { return false; } // part of old should_execute_range()
- bool check_for_demand(task &) { return false; }
- bool divisions_left() { return true; } // part of old should_execute_range()
- bool should_create_trap() { return false; }
- depth_t max_depth() { return 0; }
- void align_depth(depth_t) { }
- // common function blocks
- Partition& derived() { return *static_cast<Partition*>(this); }
- template<typename StartType>
- flag_task* split_work(StartType &start) {
- flag_task* parent_ptr = start.create_continuation(); // the type here is to express expectation
- start.set_parent(parent_ptr);
- parent_ptr->set_ref_count(2);
- StartType& right_work = *new( parent_ptr->allocate_child() ) StartType(start, split());
- start.spawn(right_work);
- return parent_ptr;
- }
- template<typename StartType, typename Range>
- void execute(StartType &start, Range &range) {
- // The algorithm in a few words ([]-denotes calls to decision methods of partitioner):
- // [If this task is stolen, adjust depth and divisions if necessary, set flag].
- // If range is divisible {
- // Spread the work while [initial divisions left];
- // Create trap task [if necessary];
- // }
- // If not divisible or [max depth is reached], execute, else do the range pool part
- task* parent_ptr = start.parent();
- if( range.is_divisible() ) {
- if( derived().divisions_left() )
- do parent_ptr = split_work(start); // split until divisions_left()
- while( range.is_divisible() && derived().divisions_left() );
- if( derived().should_create_trap() ) { // only for range pool
- if( parent_ptr->ref_count() > 1 ) { // create new parent if necessary
- parent_ptr = start.create_continuation();
- start.set_parent(parent_ptr);
- } else __TBB_ASSERT(parent_ptr->ref_count() == 1, NULL);
- parent_ptr->set_ref_count(2); // safe because parent has only one reference
- signal_task& right_signal = *new( parent_ptr->allocate_child() ) signal_task();
- start.spawn(right_signal); // pure signal is to avoid deep recursion in the end
- }
- }
- if( !range.is_divisible() || !derived().max_depth() )
- start.run_body( range ); // simple partitioner goes always here
- else { // do range pool
- internal::range_vector<Range, Partition::range_pool_size> range_pool(range);
- do {
- range_pool.split_to_fill(derived().max_depth()); // fill range pool
- if( derived().check_for_demand( start ) ) {
- if( range_pool.size() > 1 ) {
- parent_ptr = start.create_continuation();
- start.set_parent(parent_ptr);
- parent_ptr->set_ref_count(2);
- StartType& right_work = *new( parent_ptr->allocate_child() ) StartType(start, range_pool.front(), range_pool.front_depth());
- start.spawn(right_work);
- range_pool.pop_front();
- continue;
- }
- if( range_pool.back().is_divisible() ) // was not enough depth to fork a task
- continue; // note: check_for_demand() should guarantee increasing max_depth() next time
- }
- start.run_body( range_pool.back() );
- range_pool.pop_back();
- } while( !range_pool.empty() && !start.is_cancelled() );
- }
- }
- };
- //! Provides default methods for auto (adaptive) partition objects.
- template <typename Partition>
- struct auto_partition_type_base : partition_type_base<Partition> {
- size_t my_divisor;
- depth_t my_max_depth;
- auto_partition_type_base() : my_max_depth(__TBB_INIT_DEPTH) {
- my_divisor = tbb::internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4;
- __TBB_ASSERT(my_divisor, "initial value of get_initial_auto_partitioner_divisor() is not valid");
- }
- auto_partition_type_base(auto_partition_type_base &src, split) {
- my_max_depth = src.my_max_depth;
- #if __TBB_INITIAL_TASK_IMBALANCE
- if( src.my_divisor <= 1 ) my_divisor = 0;
- else my_divisor = src.my_divisor = (src.my_divisor+1u) / 2u;
- #else
- my_divisor = src.my_divisor / 2u;
- src.my_divisor = src.my_divisor - my_divisor; // TODO: check the effect separately
- if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2(src.my_divisor/my_divisor));
- #endif
- }
- bool check_being_stolen( task &t) { // part of old should_execute_range()
- if( !my_divisor ) { // if not from the top P tasks of binary tree
- my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)?
- if( t.is_stolen_task() ) {
- #if TBB_USE_EXCEPTIONS
- // RTTI is available, check whether the cast is valid
- __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
- // correctness of the cast relies on avoiding the root task for which:
- // - initial value of my_divisor != 0 (protected by separate assertion)
- // - is_stolen_task() always returns false for the root task.
- #endif
- flag_task::mark_task_stolen(t);
- my_max_depth++;
- return true;
- }
- }
- return false;
- }
- bool divisions_left() { // part of old should_execute_range()
- if( my_divisor > 1 ) return true;
- if( my_divisor && my_max_depth > 1 ) { // can split the task and once more internally. TODO: on-stack flag instead
- // keep same fragmentation while splitting for the local task pool
- my_max_depth--;
- my_divisor = 0; // decrease max_depth once per task
- return true;
- } else return false;
- }
- bool should_create_trap() {
- return my_divisor > 0;
- }
- bool check_for_demand(task &t) {
- if( flag_task::is_peer_stolen(t) ) {
- my_max_depth++;
- return true;
- } else return false;
- }
- void align_depth(depth_t base) {
- __TBB_ASSERT(base <= my_max_depth, 0);
- my_max_depth -= base;
- }
- depth_t max_depth() { return my_max_depth; }
- };
- //! Provides default methods for affinity (adaptive) partition objects.
- class affinity_partition_type : public auto_partition_type_base<affinity_partition_type> {
- static const unsigned factor_power = 4;
- static const unsigned factor = 1<<factor_power;
- bool my_delay;
- unsigned map_begin, map_end, map_mid;
- tbb::internal::affinity_id* my_array;
- void set_mid() {
- unsigned d = (map_end - map_begin)/2; // we could add 1 but it is rather for LIFO affinity
- if( d > factor )
- d &= 0u-factor;
- map_mid = map_end - d;
- }
- public:
- affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& ap ) {
- __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
- ap.resize(factor);
- my_array = ap.my_array;
- map_begin = 0;
- map_end = unsigned(ap.my_size);
- set_mid();
- my_delay = true;
- my_divisor /= __TBB_INITIAL_CHUNKS; // let exactly P tasks to be distributed across workers
- my_max_depth = factor_power+1; // the first factor_power ranges will be spawned, and >=1 ranges should be left
- __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
- }
- affinity_partition_type(affinity_partition_type& p, split)
- : auto_partition_type_base<affinity_partition_type>(p, split()), my_array(p.my_array) {
- __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begin)%factor==0, NULL );
- map_end = p.map_end;
- map_begin = p.map_end = p.map_mid;
- set_mid(); p.set_mid();
- my_delay = p.my_delay;
- }
- void set_affinity( task &t ) {
- if( map_begin<map_end )
- t.set_affinity( my_array[map_begin] );
- }
- void note_affinity( task::affinity_id id ) {
- if( map_begin<map_end )
- my_array[map_begin] = id;
- }
- bool check_for_demand( task &t ) {
- if( !my_delay ) {
- if( map_mid<map_end ) {
- __TBB_ASSERT(my_max_depth>__TBB_Log2(map_end-map_mid), 0);
- return true;// do not do my_max_depth++ here, but be sure my_max_depth is big enough
- }
- if( flag_task::is_peer_stolen(t) ) {
- my_max_depth++;
- return true;
- }
- } else my_delay = false;
- return false;
- }
- bool divisions_left() { // part of old should_execute_range()
- return my_divisor > 1;
- }
- bool should_create_trap() {
- return true; // TODO: rethink for the stage after memorizing level
- }
- static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
- };
- class auto_partition_type: public auto_partition_type_base<auto_partition_type> {
- public:
- auto_partition_type( const auto_partitioner& ) {}
- auto_partition_type( auto_partition_type& src, split)
- : auto_partition_type_base<auto_partition_type>(src, split()) {}
- static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
- };
- class simple_partition_type: public partition_type_base<simple_partition_type> {
- public:
- simple_partition_type( const simple_partitioner& ) {}
- simple_partition_type( const simple_partition_type&, split ) {}
- //! simplified algorithm
- template<typename StartType, typename Range>
- void execute(StartType &start, Range &range) {
- while( range.is_divisible() )
- split_work( start );
- start.run_body( range );
- }
- //static const unsigned range_pool_size = 1; - not necessary because execute() is overridden
- };
- //! Backward-compatible partition for auto and affinity partition objects.
- class old_auto_partition_type: public tbb::internal::partition_type_base {
- size_t num_chunks;
- static const size_t VICTIM_CHUNKS = 4;
- public:
- bool should_execute_range(const task &t) {
- if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
- num_chunks = VICTIM_CHUNKS;
- return num_chunks==1;
- }
- old_auto_partition_type( const auto_partitioner& )
- : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
- old_auto_partition_type( const affinity_partitioner& )
- : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
- old_auto_partition_type( old_auto_partition_type& pt, split ) {
- num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
- }
- };
- } // namespace interfaceX::internal
- //! @endcond
- } // namespace interfaceX
- //! A simple partitioner
- /** Divides the range until the range is not divisible.
- @ingroup algorithms */
- class simple_partitioner {
- public:
- simple_partitioner() {}
- private:
- template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
- template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
- template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
- template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
- // backward compatibility
- class partition_type: public internal::partition_type_base {
- public:
- bool should_execute_range(const task& ) {return false;}
- partition_type( const simple_partitioner& ) {}
- partition_type( const partition_type&, split ) {}
- };
- // new implementation just extends existing interface
- typedef interface6::internal::simple_partition_type task_partition_type;
- };
- //! An auto partitioner
- /** The range is initial divided into several large chunks.
- Chunks are further subdivided into smaller pieces if demand detected and they are divisible.
- @ingroup algorithms */
- class auto_partitioner {
- public:
- auto_partitioner() {}
- private:
- template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
- template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
- template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
- template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
- // backward compatibility
- typedef interface6::internal::old_auto_partition_type partition_type;
- // new implementation just extends existing interface
- typedef interface6::internal::auto_partition_type task_partition_type;
- };
- //! An affinity partitioner
- class affinity_partitioner: internal::affinity_partitioner_base_v3 {
- public:
- affinity_partitioner() {}
- private:
- template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
- template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
- template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
- template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
- // backward compatibility - for parallel_scan only
- typedef interface6::internal::old_auto_partition_type partition_type;
- // new implementation just extends existing interface
- typedef interface6::internal::affinity_partition_type task_partition_type;
- };
- } // namespace tbb
- #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
- #pragma warning (pop)
- #endif // warning 4244 is back
- #undef __TBB_INITIAL_CHUNKS
- #undef __TBB_RANGE_POOL_CAPACITY
- #undef __TBB_INIT_DEPTH
- #endif /* __TBB_partitioner_H */
|