/home/users/khuck/src/hpx-lsu/src/runtime/threads/detail/thread_pool.cpp


//  Copyright (c) 2007-2015 Hartmut Kaiser
//
//  Distributed under the Boost Software License, Version 1.0. (See accompanying
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <hpx/runtime/threads/detail/thread_pool.hpp>

#include <hpx/error_code.hpp>
#include <hpx/exception.hpp>
#include <hpx/state.hpp>
#include <hpx/throw_exception.hpp>
#include <hpx/lcos/local/no_mutex.hpp>
#include <hpx/runtime/get_worker_thread_num.hpp>
#include <hpx/runtime/threads/detail/create_thread.hpp>
#include <hpx/runtime/threads/detail/create_work.hpp>
#include <hpx/runtime/threads/detail/scheduling_loop.hpp>
#include <hpx/runtime/threads/detail/set_thread_state.hpp>
#include <hpx/runtime/threads/detail/thread_num_tss.hpp>
#include <hpx/runtime/threads/policies/callback_notifier.hpp>
#include <hpx/runtime/threads/topology.hpp>
#include <hpx/util/assert.hpp>
#include <hpx/util/bind.hpp>
#include <hpx/util/logging.hpp>
#include <hpx/util/hardware/timestamp.hpp>
#include <hpx/util/high_resolution_clock.hpp>
#include <hpx/util/unlock_guard.hpp>

#include <boost/atomic.hpp>
#include <boost/exception_ptr.hpp>
#include <boost/system/system_error.hpp>
#include <boost/thread/barrier.hpp>
#include <boost/thread/mutex.hpp>
#include <boost/thread/thread.hpp>

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <exception>
#include <functional>
#include <iomanip>
#include <mutex>
#include <numeric>

namespace hpx { namespace threads { namespace detail
{
    ///////////////////////////////////////////////////////////////////////////
    template <typename Scheduler>
    void thread_pool<Scheduler>::init_tss(std::size_t num)
    {
        thread_num_tss_.init_tss(num);
    }

    template <typename Scheduler>
    void thread_pool<Scheduler>::deinit_tss()
    {
        thread_num_tss_.deinit_tss();
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Scheduler>
    thread_pool<Scheduler>::thread_pool(Scheduler& sched,
            threads::policies::callback_notifier& notifier,
            char const* pool_name, policies::scheduler_mode m)
      : sched_(sched),
        notifier_(notifier),
        pool_name_(pool_name),
        thread_count_(0),
        used_processing_units_(),
        mode_(m)
    {
        timestamp_scale_ = 1.0;
    }

    template <typename Scheduler>
    thread_pool<Scheduler>::~thread_pool()
    {
        if (!threads_.empty()) {
            if (!sched_.has_reached_state(state_suspended))
            {
                // still running
                lcos::local::no_mutex mtx;
                std::unique_lock<lcos::local::no_mutex> l(mtx);
                stop_locked(l);
            }
            threads_.clear();
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Scheduler>
    hpx::state thread_pool<Scheduler>::get_state() const
    {
        // get_worker_thread_num returns the global thread number which might
        // be too large. This function might get called from within
        // background_work inside the os executors
        if (thread_count_ != 0)
        {
            std::size_t num_thread = get_worker_thread_num() % thread_count_;
            if (num_thread != std::size_t(-1))
                return get_state(num_thread);
        }
        return sched_.get_minmax_state().second;
    }

    template <typename Scheduler>
    hpx::state thread_pool<Scheduler>::get_state(std::size_t num_thread) const
    {
        HPX_ASSERT(num_thread != std::size_t(-1));
        return sched_.get_state(num_thread).load();
    }

    template <typename Scheduler>
    bool thread_pool<Scheduler>::has_reached_state(hpx::state s) const
    {
        return sched_.has_reached_state(s);
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Scheduler>
    std::size_t thread_pool<Scheduler>::init(std::size_t num_threads,
        policies::init_affinity_data const& data)
    {
        topology const& topology_ = get_topology();
        std::size_t cores_used = sched_.Scheduler::init(data, topology_);

        resize(used_processing_units_, threads::hardware_concurrency());
        for (std::size_t i = 0; i != num_threads; ++i)
            used_processing_units_ |= sched_.Scheduler::get_pu_mask(topology_, i);

        return cores_used;
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Scheduler>
    std::size_t thread_pool<Scheduler>::get_pu_num(std::size_t num_thread) const
    {
        return sched_.Scheduler::get_pu_num(num_thread);
    }

    template <typename Scheduler>
    mask_cref_type thread_pool<Scheduler>::get_pu_mask(
        topology const& topology, std::size_t num_thread) const
    {
        return sched_.Scheduler::get_pu_mask(topology, num_thread);
    }

    template <typename Scheduler>
    mask_cref_type thread_pool<Scheduler>::get_used_processing_units() const
    {
        return used_processing_units_;
    }

    template <typename Scheduler>
    void thread_pool<Scheduler>::do_some_work(std::size_t num_thread)
    {
        sched_.Scheduler::do_some_work(num_thread);
    }

    template <typename Scheduler>
    void thread_pool<Scheduler>::report_error(std::size_t num,
        boost::exception_ptr const& e)
    {
        sched_.set_all_states(state_terminating);
        notifier_.on_error(num, e);
        sched_.Scheduler::on_error(num, e);
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Scheduler>
    void thread_pool<Scheduler>::create_thread(thread_init_data& data,
        thread_id_type& id, thread_state_enum initial_state, bool run_now,
        error_code& ec)
    {
        // verify state
        if (thread_count_ == 0 && !sched_.is_state(state_running))
        {
            // thread-manager is not currently running
            HPX_THROWS_IF(ec, invalid_status,
                "thread_pool<Scheduler>::create_thread",
                "invalid state: thread pool is not running");
            return;
        }

        detail::create_thread(&sched_, data, id, initial_state, run_now, ec); //-V601
    }

    template <typename Scheduler>
    void thread_pool<Scheduler>::create_work(thread_init_data& data,
        thread_state_enum initial_state, error_code& ec)
    {
        // verify state
        if (thread_count_ == 0 && !sched_.is_state(state_running))
        {
            // thread-manager is not currently running
            HPX_THROWS_IF(ec, invalid_status,
                "thread_pool<Scheduler>::create_work",
                "invalid state: thread pool is not running");
            return;
        }

        detail::create_work(&sched_, data, initial_state, ec); //-V601
    }

    template <typename Scheduler>
    thread_state thread_pool<Scheduler>::set_state(
        thread_id_type const& id, thread_state_enum new_state,
        thread_state_ex_enum new_state_ex, thread_priority priority,
        error_code& ec)
    {
        return detail::set_thread_state(id, new_state, //-V107
            new_state_ex, priority, get_worker_thread_num(), ec);
    }

    template <typename Scheduler>
    thread_id_type thread_pool<Scheduler>::set_state(
        util::steady_time_point const& abs_time,
        thread_id_type const& id, thread_state_enum newstate,
        thread_state_ex_enum newstate_ex, thread_priority priority,
        error_code& ec)
    {
        return detail::set_thread_state_timed(sched_, abs_time, id,
            newstate, newstate_ex, priority, get_worker_thread_num(), ec);
    }

    template <typename Scheduler>
    void thread_pool<Scheduler>::abort_all_suspended_threads()
    {
        sched_.Scheduler::abort_all_suspended_threads();
    }

    template <typename Scheduler>
    bool thread_pool<Scheduler>::cleanup_terminated(bool delete_all)
    {
        return sched_.Scheduler::cleanup_terminated(delete_all);
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Scheduler>
    std::size_t thread_pool<Scheduler>::get_worker_thread_num() const
    {
        return thread_num_tss_.get_worker_thread_num();
    }

    template <typename Scheduler>
    boost::thread& thread_pool<Scheduler>::get_os_thread_handle(
        std::size_t num_thread)
    {
        HPX_ASSERT(num_thread < threads_.size());
        return threads_[threads_.size() - num_thread - 1];
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::get_thread_count(
        thread_state_enum state, thread_priority priority,
        std::size_t num, bool reset) const
    {
        return sched_.Scheduler::get_thread_count(state, priority, num, reset);
    }

    template <typename Scheduler>
    bool thread_pool<Scheduler>::enumerate_threads(
        util::function_nonser<bool(thread_id_type)> const& f,
        thread_state_enum state) const
    {
        return sched_.Scheduler::enumerate_threads(f, state);
    }

    template <typename Scheduler>
    void thread_pool<Scheduler>::reset_thread_distribution()
    {
        return sched_.Scheduler::reset_thread_distribution();
    }

    template <typename Scheduler>
    void thread_pool<Scheduler>::set_scheduler_mode(
        threads::policies::scheduler_mode mode)
    {
        return sched_.set_scheduler_mode(mode);
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Scheduler>
    bool thread_pool<Scheduler>::run(std::unique_lock<boost::mutex>& l,
        std::size_t num_threads)
    {
        HPX_ASSERT(l.owns_lock());

        LTM_(info) //-V128
            << "thread_pool::run: " << pool_name_
            << " number of processing units available: " //-V128
            << threads::hardware_concurrency();
        LTM_(info) //-V128
            << "thread_pool::run: " << pool_name_
            << " creating " << num_threads << " OS thread(s)"; //-V128

        if (0 == num_threads) {
            HPX_THROW_EXCEPTION(bad_parameter,
                "thread_pool::run", "number of threads is zero");
        }

        if (!threads_.empty() || sched_.has_reached_state(state_running))
            return true;    // do nothing if already running

        executed_threads_.resize(num_threads);
        executed_thread_phases_.resize(num_threads);

        tfunc_times_.resize(num_threads);
        exec_times_.resize(num_threads);

        reset_tfunc_times_.resize(num_threads);

        // scale timestamps to nanoseconds
        std::uint64_t base_timestamp = util::hardware::timestamp();
        std::uint64_t base_time = util::high_resolution_clock::now();
        std::uint64_t curr_timestamp = util::hardware::timestamp();
        std::uint64_t curr_time = util::high_resolution_clock::now();

        while ((curr_time - base_time) <= 100000)
        {
            curr_timestamp = util::hardware::timestamp();
            curr_time = util::high_resolution_clock::now();
        }

        if (curr_timestamp - base_timestamp != 0)
        {
            timestamp_scale_ = double(curr_time - base_time) /
                double(curr_timestamp - base_timestamp);
        }

#if defined(HPX_HAVE_THREAD_CUMULATIVE_COUNTS)
        // timestamps/values of last reset operation for various performance
        // counters
        reset_executed_threads_.resize(num_threads);
        reset_executed_thread_phases_.resize(num_threads);

#if defined(HPX_HAVE_THREAD_IDLE_RATES)
        // timestamps/values of last reset operation for various performance
        // counters
        reset_thread_duration_.resize(num_threads);
        reset_thread_duration_times_.resize(num_threads);

        reset_thread_overhead_.resize(num_threads);
        reset_thread_overhead_times_.resize(num_threads);
        reset_thread_overhead_times_total_.resize(num_threads);

        reset_thread_phase_duration_.resize(num_threads);
        reset_thread_phase_duration_times_.resize(num_threads);

        reset_thread_phase_overhead_.resize(num_threads);
        reset_thread_phase_overhead_times_.resize(num_threads);
        reset_thread_phase_overhead_times_total_.resize(num_threads);

        reset_cumulative_thread_duration_.resize(num_threads);

        reset_cumulative_thread_overhead_.resize(num_threads);
        reset_cumulative_thread_overhead_total_.resize(num_threads);
#endif
#endif

#if defined(HPX_HAVE_THREAD_IDLE_RATES)
        reset_idle_rate_time_.resize(num_threads);
        reset_idle_rate_time_total_.resize(num_threads);

#if defined(HPX_HAVE_THREAD_CREATION_AND_CLEANUP_RATES)
        reset_creation_idle_rate_time_.resize(num_threads);
        reset_creation_idle_rate_time_total_.resize(num_threads);

        reset_cleanup_idle_rate_time_.resize(num_threads);
        reset_cleanup_idle_rate_time_total_.resize(num_threads);
#endif
#endif

        LTM_(info)
            << "thread_pool::run: " << pool_name_
            << " timestamp_scale: " << timestamp_scale_; //-V128

        try {
            HPX_ASSERT(startup_.get() == nullptr);
            startup_.reset(
                new boost::barrier(static_cast<unsigned>(num_threads+1))
            );

            // run threads and wait for initialization to complete

            topology const& topology_ = get_topology();

            std::size_t thread_num = num_threads;
            while (thread_num-- != 0) {
                threads::mask_cref_type mask =
                    sched_.Scheduler::get_pu_mask(topology_, thread_num);

                LTM_(info) //-V128
                    << "thread_pool::run: " << pool_name_
                    << " create OS thread " << thread_num //-V128
                    << ": will run on processing units within this mask: "
#if !defined(HPX_HAVE_MORE_THAN_64_THREADS) || \
    (defined(HPX_HAVE_MAX_CPU_COUNT) && HPX_HAVE_MAX_CPU_COUNT <= 64)
                    << std::hex << "0x" << mask;
#else
                    << "0b" << mask;
#endif

                // create a new thread
                threads_.push_back(boost::thread(
                        &thread_pool::thread_func, this, thread_num,
                        std::ref(topology_), std::ref(*startup_)
                    ));

                // set the new threads affinity (on Windows systems)
                if (any(mask))
                {
                    error_code ec(lightweight);
                    topology_.set_thread_affinity_mask(threads_.back(), mask, ec);
                    if (ec)
                    {
                        LTM_(warning) //-V128
                            << "thread_pool::run: " << pool_name_
                            << " setting thread affinity on OS thread " //-V128
                            << thread_num << " failed with: "
                            << ec.get_message();
                    }
                }
                else
                {
                    LTM_(debug) //-V128
                        << "thread_pool::run: " << pool_name_
                        << " setting thread affinity on OS thread " //-V128
                        << thread_num << " was explicitly disabled.";
                }
            }

            // the main thread needs to have a unique thread_num
            init_tss(num_threads);
            startup_->wait();

            // The scheduler is now running.
            sched_.set_all_states(state_running);
        }
        catch (std::exception const& e) {
            LTM_(always)
                << "thread_pool::run: " << pool_name_
                << " failed with: " << e.what();

            // trigger the barrier
            if (startup_.get() != nullptr)
            {
                while (num_threads-- != 0 && !startup_->wait())
                    ;
            }

            stop(l);
            threads_.clear();

            return false;
        }

        LTM_(info) << "thread_pool::run: " << pool_name_ << " running";
        return true;
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Scheduler>
    void thread_pool<Scheduler>::stop (
        std::unique_lock<boost::mutex>& l, bool blocking)
    {
        HPX_ASSERT(l.owns_lock());

        return stop_locked(l, blocking);
    }

    template <typename Scheduler>
    template <typename Lock>
    void thread_pool<Scheduler>::stop_locked(Lock& l, bool blocking)
    {
        LTM_(info)
            << "thread_pool::stop: " << pool_name_
            << " blocking(" << std::boolalpha << blocking << ")";

        deinit_tss();

        if (!threads_.empty()) {
            // set state to stopping
            sched_.set_all_states(state_stopping);

            // make sure we're not waiting
            sched_.Scheduler::do_some_work(std::size_t(-1));

            if (blocking) {
                for (std::size_t i = 0; i != threads_.size(); ++i)
                {
                    // make sure no OS thread is waiting
                    LTM_(info)
                        << "thread_pool::stop: " << pool_name_
                        << " notify_all";

                    sched_.Scheduler::do_some_work(std::size_t(-1));

                    LTM_(info) //-V128
                        << "thread_pool::stop: " << pool_name_
                        << " join:" << i; //-V128

                    // unlock the lock while joining
                    util::unlock_guard<Lock> ul(l);
                    threads_[i].join();
                }
                threads_.clear();
            }
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    struct manage_active_thread_count
    {
        manage_active_thread_count(boost::atomic<long>& counter)
          : counter_(counter)
        {
            ++counter_;
        }
        ~manage_active_thread_count()
        {
            --counter_;
        }

        boost::atomic<long>& counter_;
    };

    template <typename Scheduler>
    struct init_tss_helper
    {
        init_tss_helper(thread_pool<Scheduler>& pool, std::size_t thread_num)
          : pool_(pool), thread_num_(thread_num)
        {
            pool.notifier_.on_start_thread(thread_num);
            pool.init_tss(thread_num);
            pool.sched_.Scheduler::on_start_thread(thread_num);
        }
        ~init_tss_helper()
        {
            pool_.sched_.Scheduler::on_stop_thread(thread_num_);
            pool_.deinit_tss();
            pool_.notifier_.on_stop_thread(thread_num_);
        }

        thread_pool<Scheduler>& pool_;
        std::size_t thread_num_;
    };

    template <typename Scheduler>
    void thread_pool<Scheduler>::thread_func(std::size_t num_thread,
        topology const& topology, boost::barrier& startup)
    {
        // Set the affinity for the current thread.
        threads::mask_cref_type mask =
            sched_.Scheduler::get_pu_mask(topology, num_thread);

        if (LHPX_ENABLED(debug))
            topology.write_to_log();

        error_code ec(lightweight);
        if (any(mask))
        {
            topology.set_thread_affinity_mask(mask, ec);
            if (ec)
            {
                LTM_(warning) //-V128
                    << "thread_pool::thread_func: " << pool_name_
                    << " setting thread affinity on OS thread " //-V128
                    << num_thread << " failed with: " << ec.get_message();
            }
        }
        else
        {
            LTM_(debug) //-V128
                << "thread_pool::thread_func: " << pool_name_
                << " setting thread affinity on OS thread " //-V128
                << num_thread << " was explicitly disabled.";
        }

        // Setting priority of worker threads to a lower priority, this needs to
        // be done in order to give the parcel pool threads higher priority
        if ((mode_ & policies::reduce_thread_priority) &&
            any(mask & used_processing_units_))
        {
            topology.reduce_thread_priority(ec);
            if (ec)
            {
                LTM_(warning) //-V128
                    << "thread_pool::thread_func: " << pool_name_
                    << " reducing thread priority on OS thread " //-V128
                    << num_thread << " failed with: " << ec.get_message();
            }
        }

        // manage the number of this thread in its TSS
        init_tss_helper<Scheduler> tss_helper(*this, num_thread);

        // wait for all threads to start up before before starting HPX work
        startup.wait();

        {
            LTM_(info) //-V128
                << "thread_pool::thread_func: " << pool_name_
                << " starting OS thread: " << num_thread; //-V128

            try {
                try {
                    manage_active_thread_count count(thread_count_);

                    // run the work queue
                    hpx::threads::coroutines::prepare_main_thread main_thread;

                    // run main Scheduler loop until terminated
                    detail::scheduling_counters counters(
                        executed_threads_[num_thread],
                        executed_thread_phases_[num_thread],
                        tfunc_times_[num_thread], exec_times_[num_thread]);

                    detail::scheduling_callbacks callbacks(
                        util::bind( //-V107
                            &policies::scheduler_base::idle_callback,
                            &sched_, num_thread
                        ),
                        detail::scheduling_callbacks::callback_type());

                    if (mode_ & policies::do_background_work)
                    {
                        callbacks.background_ = util::bind( //-V107
                            &policies::scheduler_base::background_callback,
                            &sched_, num_thread);
                    }

                    sched_.set_scheduler_mode(mode_);
                    detail::scheduling_loop(num_thread, sched_, counters,
                        callbacks);

                    // the OS thread is allowed to exit only if no more HPX
                    // threads exist or if some other thread has terminated
                    HPX_ASSERT(!sched_.Scheduler::get_thread_count(
                            unknown, thread_priority_default, num_thread) ||
                        sched_.get_state(num_thread) == state_terminating);
                }
                catch (hpx::exception const& e) {
                    LFATAL_ //-V128
                        << "thread_pool::thread_func: " << pool_name_
                        << " thread_num:" << num_thread //-V128
                        << " : caught hpx::exception: "
                        << e.what() << ", aborted thread execution";

                    report_error(num_thread, boost::current_exception());
                    return;
                }
                catch (boost::system::system_error const& e) {
                    LFATAL_ //-V128
                        << "thread_pool::thread_func: " << pool_name_
                        << " thread_num:" << num_thread //-V128
                        << " : caught boost::system::system_error: "
                        << e.what() << ", aborted thread execution";

                    report_error(num_thread, boost::current_exception());
                    return;
                }
                catch (std::exception const& e) {
                    // Repackage exceptions to avoid slicing.
                    boost::throw_exception(boost::enable_error_info(
                        hpx::exception(unhandled_exception, e.what())));
                }
            }
            catch (...) {
                LFATAL_ //-V128
                    << "thread_pool::thread_func: " << pool_name_
                    << " thread_num:" << num_thread //-V128
                    << " : caught unexpected " //-V128
                       "exception, aborted thread execution";

                report_error(num_thread, boost::current_exception());
                return;
            }

            LTM_(info) //-V128
                << "thread_pool::thread_func: " << pool_name_
                << " thread_num: " << num_thread
                << " : ending OS thread, " //-V128
                   "executed " << executed_threads_[num_thread]
                << " HPX threads";
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // performance counters
#if defined(HPX_HAVE_THREAD_CUMULATIVE_COUNTS)
    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_executed_threads(std::size_t num, bool reset)
    {
        std::int64_t executed_threads = 0;
        std::int64_t reset_executed_threads = 0;

        if (num != std::size_t(-1))
        {
            executed_threads = executed_threads_[num];
            reset_executed_threads = reset_executed_threads_[num];

            if (reset)
                reset_executed_threads_[num] = executed_threads;
        }
        else
        {
            executed_threads = std::accumulate(executed_threads_.begin(),
                executed_threads_.end(), std::int64_t(0));
            reset_executed_threads = std::accumulate(
                reset_executed_threads_.begin(),
                reset_executed_threads_.end(), std::int64_t(0));

            if (reset)
            {
                std::copy(executed_threads_.begin(), executed_threads_.end(),
                    reset_executed_threads_.begin());
            }
        }

        HPX_ASSERT(executed_threads >= reset_executed_threads);

        return executed_threads - reset_executed_threads;
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_executed_thread_phases(std::size_t num, bool reset)
    {
        std::int64_t executed_phases = 0;
        std::int64_t reset_executed_phases = 0;

        if (num != std::size_t(-1))
        {
            executed_phases = executed_thread_phases_[num];
            reset_executed_phases = reset_executed_thread_phases_[num];

            if (reset)
                reset_executed_thread_phases_[num] = executed_phases;
        }
        else
        {
            executed_phases = std::accumulate(executed_thread_phases_.begin(),
                executed_thread_phases_.end(), std::int64_t(0));
            reset_executed_phases = std::accumulate(
                reset_executed_thread_phases_.begin(),
                reset_executed_thread_phases_.end(), std::int64_t(0));

            if (reset)
            {
                std::copy(executed_thread_phases_.begin(),
                    executed_thread_phases_.end(),
                    reset_executed_thread_phases_.begin());
            }
        }

        HPX_ASSERT(executed_phases >= reset_executed_phases);

        return executed_phases - reset_executed_phases;
    }

#if defined(HPX_HAVE_THREAD_IDLE_RATES)
    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_thread_phase_duration(std::size_t num, bool reset)
    {
        std::uint64_t exec_total = 0ul;
        std::int64_t num_phases = 0l;
        std::uint64_t reset_exec_total = 0ul;
        std::int64_t reset_num_phases = 0l;

        if (num != std::size_t(-1))
        {
            exec_total = exec_times_[num];
            num_phases = executed_thread_phases_[num];

            reset_exec_total = reset_thread_phase_duration_times_[num];
            reset_num_phases = reset_thread_phase_duration_[num];

            if (reset)
            {
                reset_thread_phase_duration_[num] = num_phases;
                reset_thread_phase_duration_times_[num] = exec_total;
            }
        }
        else
        {
            exec_total = std::accumulate(exec_times_.begin(),
                exec_times_.end(), std::uint64_t(0));
            num_phases = std::accumulate(executed_thread_phases_.begin(),
                executed_thread_phases_.end(), std::int64_t(0));

            reset_exec_total = std::accumulate(
                reset_thread_phase_duration_times_.begin(),
                reset_thread_phase_duration_times_.end(), std::uint64_t(0));
            reset_num_phases = std::accumulate(
                reset_thread_phase_duration_.begin(),
                reset_thread_phase_duration_.end(), std::int64_t(0));

            if (reset)
            {
                std::copy(exec_times_.begin(), exec_times_.end(),
                    reset_thread_phase_duration_times_.begin());
                std::copy(executed_thread_phases_.begin(),
                    executed_thread_phases_.end(),
                    reset_thread_phase_duration_.begin());
            }
        }

        HPX_ASSERT(exec_total >= reset_exec_total);
        HPX_ASSERT(num_phases >= reset_num_phases);

        exec_total -= reset_exec_total;
        num_phases -= reset_num_phases;

        return std::uint64_t(
                (double(exec_total) * timestamp_scale_) / double(num_phases)
            );
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_thread_duration(std::size_t num, bool reset)
    {
        std::uint64_t exec_total = 0ul;
        std::int64_t num_threads = 0l;
        std::uint64_t reset_exec_total = 0ul;
        std::int64_t reset_num_threads = 0l;

        if (num != std::size_t(-1))
        {
            exec_total = exec_times_[num];
            num_threads = executed_threads_[num];

            reset_exec_total = reset_thread_duration_times_[num];
            reset_num_threads = reset_thread_duration_[num];

            if (reset)
            {
                reset_thread_duration_[num] = num_threads;
                reset_thread_duration_times_[num] = exec_total;
            }
        }
        else
        {
            exec_total = std::accumulate(exec_times_.begin(),
                exec_times_.end(), std::uint64_t(0));
            num_threads = std::accumulate(executed_threads_.begin(),
                executed_threads_.end(), std::int64_t(0));

            reset_exec_total = std::accumulate(
                reset_thread_duration_times_.begin(),
                reset_thread_duration_times_.end(),
                std::uint64_t(0));
            reset_num_threads = std::accumulate(
                reset_thread_duration_.begin(),
                reset_thread_duration_.end(),
                std::int64_t(0));

            if (reset)
            {
                std::copy(exec_times_.begin(), exec_times_.end(),
                    reset_thread_duration_times_.begin());
                std::copy(executed_threads_.begin(),
                    executed_threads_.end(),
                    reset_thread_duration_.begin());
            }
        }

        HPX_ASSERT(exec_total >= reset_exec_total);
        HPX_ASSERT(num_threads >= reset_num_threads);

        exec_total -= reset_exec_total;
        num_threads -= reset_num_threads;

        return std::uint64_t(
                (double(exec_total) * timestamp_scale_) / double(num_threads)
            );
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_thread_phase_overhead(std::size_t num, bool reset)
    {
        std::uint64_t exec_total = 0;
        std::uint64_t tfunc_total = 0;
        std::int64_t num_phases = 0;

        std::uint64_t reset_exec_total = 0;
        std::uint64_t reset_tfunc_total = 0;
        std::int64_t reset_num_phases = 0;

        if (num != std::size_t(-1))
        {
            exec_total = exec_times_[num];
            tfunc_total = tfunc_times_[num];
            num_phases = executed_thread_phases_[num];

            reset_exec_total =  reset_thread_phase_overhead_times_[num];
            reset_tfunc_total = reset_thread_phase_overhead_times_total_[num];
            reset_num_phases =  reset_thread_phase_overhead_[num];

            if (reset)
            {
                reset_thread_phase_overhead_times_[num] = exec_total;
                reset_thread_phase_overhead_times_total_[num] = tfunc_total;
                reset_thread_phase_overhead_[num] = num_phases;
            }
        }
        else
        {
            exec_total = std::accumulate(exec_times_.begin(),
                exec_times_.end(), std::uint64_t(0));
            tfunc_total = std::accumulate(tfunc_times_.begin(),
                tfunc_times_.end(), std::uint64_t(0));
            num_phases = std::accumulate(
                executed_thread_phases_.begin(),
                executed_thread_phases_.end(), std::int64_t(0));

            reset_exec_total = std::accumulate(
                reset_thread_phase_overhead_times_.begin(),
                reset_thread_phase_overhead_times_.end(), std::uint64_t(0));
            reset_tfunc_total = std::accumulate(
                reset_thread_phase_overhead_times_total_.begin(),
                reset_thread_phase_overhead_times_total_.end(),
                std::uint64_t(0));
            reset_num_phases = std::accumulate(
                reset_thread_phase_overhead_.begin(),
                reset_thread_phase_overhead_.end(), std::int64_t(0));

            if (reset)
            {
                std::copy(exec_times_.begin(), exec_times_.end(),
                    reset_thread_phase_overhead_times_.begin());
                std::copy(tfunc_times_.begin(), tfunc_times_.end(),
                    reset_thread_phase_overhead_times_total_.begin());
                std::copy(executed_thread_phases_.begin(),
                    executed_thread_phases_.end(),
                    reset_thread_phase_overhead_.begin());
            }
        }

        HPX_ASSERT(exec_total >= reset_exec_total);
        HPX_ASSERT(tfunc_total >= reset_tfunc_total);
        HPX_ASSERT(num_phases >= reset_num_phases);

        exec_total -= reset_exec_total;
        tfunc_total -= reset_tfunc_total;
        num_phases -= reset_num_phases;

        if (num_phases == 0)        // avoid division by zero
            return 0;

        HPX_ASSERT(tfunc_total >= exec_total);

        return std::uint64_t(
                double((tfunc_total - exec_total) * timestamp_scale_) /
                double(num_phases)
            );
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_thread_overhead(std::size_t num, bool reset)
    {
        std::uint64_t exec_total = 0;
        std::uint64_t tfunc_total = 0;
        std::int64_t num_threads = 0;

        std::uint64_t reset_exec_total = 0;
        std::uint64_t reset_tfunc_total = 0;
        std::int64_t reset_num_threads = 0;

        if (num != std::size_t(-1))
        {
            exec_total = exec_times_[num];
            tfunc_total = tfunc_times_[num];
            num_threads = executed_threads_[num];

            reset_exec_total =  reset_thread_overhead_times_[num];
            reset_tfunc_total = reset_thread_overhead_times_total_[num];
            reset_num_threads =  reset_thread_overhead_[num];

            if (reset)
            {
                reset_thread_overhead_times_[num] = exec_total;
                reset_thread_overhead_times_total_[num] = tfunc_total;
                reset_thread_overhead_[num] = num_threads;
            }
        }
        else
        {
            exec_total = std::accumulate(exec_times_.begin(),
                exec_times_.end(), std::uint64_t(0));
            tfunc_total = std::accumulate(tfunc_times_.begin(),
                tfunc_times_.end(), std::uint64_t(0));
            num_threads = std::accumulate(executed_threads_.begin(),
                executed_threads_.end(), std::int64_t(0));

            reset_exec_total = std::accumulate(
                reset_thread_overhead_times_.begin(),
                reset_thread_overhead_times_.end(), std::uint64_t(0));
            reset_tfunc_total = std::accumulate(
                reset_thread_overhead_times_total_.begin(),
                reset_thread_overhead_times_total_.end(),
                std::uint64_t(0));
            reset_num_threads = std::accumulate(
                reset_thread_overhead_.begin(),
                reset_thread_overhead_.end(), std::int64_t(0));

            if (reset)
            {
                std::copy(exec_times_.begin(), exec_times_.end(),
                    reset_thread_overhead_times_.begin());
                std::copy(tfunc_times_.begin(), tfunc_times_.end(),
                    reset_thread_overhead_times_total_.begin());
                std::copy(executed_threads_.begin(),
                    executed_threads_.end(),
                    reset_thread_overhead_.begin());
            }
        }

        HPX_ASSERT(exec_total >= reset_exec_total);
        HPX_ASSERT(tfunc_total >= reset_tfunc_total);
        HPX_ASSERT(num_threads >= reset_num_threads);

        exec_total -= reset_exec_total;
        tfunc_total -= reset_tfunc_total;
        num_threads -= reset_num_threads;

        if (num_threads == 0)        // avoid division by zero
            return 0;

        HPX_ASSERT(tfunc_total >= exec_total);

        return std::uint64_t(
                double((tfunc_total - exec_total) * timestamp_scale_) /
                double(num_threads)
            );
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_cumulative_thread_duration(std::size_t num, bool reset)
    {
        std::uint64_t exec_total = 0ul;
        std::uint64_t reset_exec_total = 0ul;

        if (num != std::size_t(-1))
        {
            exec_total = exec_times_[num];
            reset_exec_total = reset_cumulative_thread_duration_[num];

            if (reset)
                reset_cumulative_thread_duration_[num] = exec_total;
        }
        else
        {
            exec_total = std::accumulate(exec_times_.begin(),
                exec_times_.end(), std::uint64_t(0));
            reset_exec_total = std::accumulate(
                reset_cumulative_thread_duration_.begin(),
                reset_cumulative_thread_duration_.end(),
                std::uint64_t(0));

            if (reset)
            {
                std::copy(exec_times_.begin(), exec_times_.end(),
                    reset_cumulative_thread_duration_.begin());
            }
        }

        HPX_ASSERT(exec_total >= reset_exec_total);

        exec_total -= reset_exec_total;

        return std::uint64_t(double(exec_total) * timestamp_scale_);
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_cumulative_thread_overhead(std::size_t num, bool reset)
    {
        std::uint64_t exec_total = 0ul;
        std::uint64_t reset_exec_total = 0ul;
        std::uint64_t tfunc_total = 0ul;
        std::uint64_t reset_tfunc_total = 0ul;

        if (num != std::size_t(-1))
        {
            exec_total = exec_times_[num];
            tfunc_total = tfunc_times_[num];

            reset_exec_total = reset_cumulative_thread_overhead_[num];
            reset_tfunc_total = reset_cumulative_thread_overhead_total_[num];

            if (reset)
            {
                reset_cumulative_thread_overhead_[num] = exec_total;
                reset_cumulative_thread_overhead_total_[num] = tfunc_total;
            }
        }
        else
        {
            exec_total = std::accumulate(exec_times_.begin(),
                exec_times_.end(), std::uint64_t(0));
            reset_exec_total = std::accumulate(
                reset_cumulative_thread_overhead_.begin(),
                reset_cumulative_thread_overhead_.end(),
                std::uint64_t(0));

            tfunc_total = std::accumulate(tfunc_times_.begin(),
                tfunc_times_.end(), std::uint64_t(0));
            reset_tfunc_total = std::accumulate(
                reset_cumulative_thread_overhead_total_.begin(),
                reset_cumulative_thread_overhead_total_.end(),
                std::uint64_t(0));

            if (reset)
            {
                std::copy(exec_times_.begin(), exec_times_.end(),
                    reset_cumulative_thread_overhead_.begin());
                std::copy(tfunc_times_.begin(), tfunc_times_.end(),
                    reset_cumulative_thread_overhead_total_.begin());
            }
        }

        HPX_ASSERT(exec_total >= reset_exec_total);
        HPX_ASSERT(tfunc_total >= reset_tfunc_total);

        exec_total -= reset_exec_total;
        tfunc_total -= reset_tfunc_total;

        return std::uint64_t(
                (double(tfunc_total) - double(exec_total)) * timestamp_scale_
            );
    }
#endif
#endif

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_cumulative_duration(std::size_t num, bool reset)
    {
        std::uint64_t tfunc_total = 0ul;
        std::uint64_t reset_tfunc_total = 0ul;

        if (num != std::size_t(-1))
        {
            tfunc_total = tfunc_times_[num];
            reset_tfunc_total = reset_tfunc_times_[num];

            if (reset)
                reset_tfunc_times_[num] = tfunc_total;
        }
        else
        {
            tfunc_total = std::accumulate(tfunc_times_.begin(),
                tfunc_times_.end(), std::uint64_t(0));
            reset_tfunc_total = std::accumulate(
                reset_tfunc_times_.begin(), reset_tfunc_times_.end(),
                std::uint64_t(0));

            if (reset)
            {
                std::copy(tfunc_times_.begin(), tfunc_times_.end(),
                    reset_tfunc_times_.begin());
            }
        }

        HPX_ASSERT(tfunc_total >= reset_tfunc_total);

        tfunc_total -= reset_tfunc_total;

        return std::uint64_t(double(tfunc_total) * timestamp_scale_);
    }

#if defined(HPX_HAVE_THREAD_IDLE_RATES)
    ///////////////////////////////////////////////////////////////////////////
    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::avg_idle_rate(bool reset)
    {
        std::uint64_t exec_total = std::accumulate(exec_times_.begin(),
            exec_times_.end(), std::uint64_t(0));
        std::uint64_t tfunc_total = std::accumulate(tfunc_times_.begin(),
            tfunc_times_.end(), std::uint64_t(0));
        std::uint64_t reset_exec_total = std::accumulate(
            reset_idle_rate_time_.begin(),
            reset_idle_rate_time_.end(), std::uint64_t(0));
        std::uint64_t reset_tfunc_total = std::accumulate(
            reset_idle_rate_time_total_.begin(),
            reset_idle_rate_time_total_.end(), std::uint64_t(0));

        if (reset)
        {
            std::copy(exec_times_.begin(), exec_times_.end(),
                reset_idle_rate_time_.begin());
            std::copy(tfunc_times_.begin(), tfunc_times_.end(),
                reset_idle_rate_time_total_.begin());
        }

        HPX_ASSERT(exec_total >= reset_exec_total);
        HPX_ASSERT(tfunc_total >= reset_tfunc_total);

        exec_total -= reset_exec_total;
        tfunc_total -= reset_tfunc_total;

        if (tfunc_total == 0)   // avoid division by zero
            return 10000LL;

        HPX_ASSERT(tfunc_total >= exec_total);

        double const percent = 1. - (double(exec_total) / double(tfunc_total));
        return std::int64_t(10000. * percent);   // 0.01 percent
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::avg_idle_rate(
        std::size_t num_thread, bool reset)
    {
        std::uint64_t exec_time = exec_times_[num_thread];
        std::uint64_t tfunc_time = tfunc_times_[num_thread];
        std::uint64_t reset_exec_time = reset_idle_rate_time_[num_thread];
        std::uint64_t reset_tfunc_time = reset_idle_rate_time_total_[num_thread];

        if (reset)
        {
            reset_idle_rate_time_[num_thread] = exec_time;
            reset_idle_rate_time_total_[num_thread] = tfunc_time;
        }

        HPX_ASSERT(exec_time >= reset_exec_time);
        HPX_ASSERT(tfunc_time >= reset_tfunc_time);

        exec_time -= reset_exec_time;
        tfunc_time -= reset_tfunc_time;

        if (tfunc_time == 0)   // avoid division by zero
            return 10000LL;

        HPX_ASSERT(tfunc_time > exec_time);

        double const percent = 1. - (double(exec_time) / double(tfunc_time));
        return std::int64_t(10000. * percent);   // 0.01 percent
    }

#if defined(HPX_HAVE_THREAD_CREATION_AND_CLEANUP_RATES)
    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::avg_creation_idle_rate(bool reset)
    {
        double const creation_total =
            static_cast<double>(sched_.get_creation_time(reset));

        std::uint64_t exec_total = std::accumulate(exec_times_.begin(),
            exec_times_.end(), std::uint64_t(0));
        std::uint64_t tfunc_total = std::accumulate(tfunc_times_.begin(),
            tfunc_times_.end(), std::uint64_t(0));
        std::uint64_t reset_exec_total = std::accumulate(
            reset_creation_idle_rate_time_.begin(),
            reset_creation_idle_rate_time_.end(), std::uint64_t(0));
        std::uint64_t reset_tfunc_total = std::accumulate(
            reset_creation_idle_rate_time_total_.begin(),
            reset_creation_idle_rate_time_total_.end(), std::uint64_t(0));

        if (reset)
        {
            std::copy(exec_times_.begin(), exec_times_.end(),
                reset_creation_idle_rate_time_.begin());
            std::copy(tfunc_times_.begin(), tfunc_times_.end(),
                reset_creation_idle_rate_time_.begin());
        }

        HPX_ASSERT(exec_total >= reset_exec_total);
        HPX_ASSERT(tfunc_total >= reset_tfunc_total);

        exec_total -= reset_exec_total;
        tfunc_total -= reset_tfunc_total;

        if (tfunc_total == exec_total)   // avoid division by zero
            return 10000LL;

        HPX_ASSERT(tfunc_total > exec_total);

        double const percent = (creation_total / double(tfunc_total - exec_total));
        return std::int64_t(10000. * percent);    // 0.01 percent
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::avg_cleanup_idle_rate(bool reset)
    {
        double const cleanup_total =
            static_cast<double>(sched_.get_cleanup_time(reset));

        std::uint64_t exec_total = std::accumulate(exec_times_.begin(),
            exec_times_.end(), std::uint64_t(0));
        std::uint64_t tfunc_total = std::accumulate(tfunc_times_.begin(),
            tfunc_times_.end(), std::uint64_t(0));
        std::uint64_t reset_exec_total = std::accumulate(
            reset_cleanup_idle_rate_time_.begin(),
            reset_cleanup_idle_rate_time_.end(), std::uint64_t(0));
        std::uint64_t reset_tfunc_total = std::accumulate(
            reset_cleanup_idle_rate_time_total_.begin(),
            reset_cleanup_idle_rate_time_total_.end(), std::uint64_t(0));

        if (reset)
        {
            std::copy(exec_times_.begin(), exec_times_.end(),
                reset_cleanup_idle_rate_time_.begin());
            std::copy(tfunc_times_.begin(), tfunc_times_.end(),
                reset_cleanup_idle_rate_time_.begin());
        }

        HPX_ASSERT(exec_total >= reset_exec_total);
        HPX_ASSERT(tfunc_total >= reset_tfunc_total);

        exec_total -= reset_exec_total;
        tfunc_total -= reset_tfunc_total;

        if (tfunc_total == exec_total)   // avoid division by zero
            return 10000LL;

        HPX_ASSERT(tfunc_total > exec_total);

        double const percent = (cleanup_total / double(tfunc_total - exec_total));
        return std::int64_t(10000. * percent);    // 0.01 percent
    }
#endif
#endif

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_queue_length(std::size_t num_thread) const
    {
        return sched_.Scheduler::get_queue_length(num_thread);
    }

#ifdef HPX_HAVE_THREAD_QUEUE_WAITTIME
    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_average_thread_wait_time(std::size_t num_thread) const
    {
        return sched_.Scheduler::get_average_thread_wait_time(num_thread);
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_average_task_wait_time(std::size_t num_thread) const
    {
        return sched_.Scheduler::get_average_task_wait_time(num_thread);
    }
#endif

#ifdef HPX_HAVE_THREAD_STEALING_COUNTS
    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_num_pending_misses(std::size_t num, bool reset)
    {
        return sched_.Scheduler::get_num_pending_misses(num, reset);
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_num_pending_accesses(std::size_t num, bool reset)
    {
        return sched_.Scheduler::get_num_pending_accesses(num, reset);
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_num_stolen_from_pending(std::size_t num, bool reset)
    {
        return sched_.Scheduler::get_num_stolen_from_pending(num, reset);
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_num_stolen_to_pending(std::size_t num, bool reset)
    {
        return sched_.Scheduler::get_num_stolen_to_pending(num, reset);
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_num_stolen_from_staged(std::size_t num, bool reset)
    {
        return sched_.Scheduler::get_num_stolen_from_staged(num, reset);
    }

    template <typename Scheduler>
    std::int64_t thread_pool<Scheduler>::
        get_num_stolen_to_staged(std::size_t num, bool reset)
    {
        return sched_.Scheduler::get_num_stolen_to_staged(num, reset);
    }
#endif

}}}

///////////////////////////////////////////////////////////////////////////////
/// explicit template instantiation for the thread manager of our choice
#if defined(HPX_HAVE_THROTTLE_SCHEDULER) && defined(HPX_HAVE_APEX)
#include <hpx/runtime/threads/policies/throttle_queue_scheduler.hpp>
template class HPX_EXPORT hpx::threads::detail::thread_pool<
    hpx::threads::policies::throttle_queue_scheduler<> >;
#endif

#if defined(HPX_HAVE_LOCAL_SCHEDULER)
#include <hpx/runtime/threads/policies/local_queue_scheduler.hpp>
template class HPX_EXPORT hpx::threads::detail::thread_pool<
    hpx::threads::policies::local_queue_scheduler<> >;
#endif

#if defined(HPX_HAVE_STATIC_SCHEDULER)
#include <hpx/runtime/threads/policies/static_queue_scheduler.hpp>
template class HPX_EXPORT hpx::threads::detail::thread_pool<
    hpx::threads::policies::static_queue_scheduler<> >;
#endif

#if defined(HPX_HAVE_STATIC_PRIORITY_SCHEDULER)
#include <hpx/runtime/threads/policies/static_priority_queue_scheduler.hpp>
template class HPX_EXPORT hpx::threads::detail::thread_pool<
    hpx::threads::policies::static_priority_queue_scheduler<> >;
#endif

#include <hpx/runtime/threads/policies/local_priority_queue_scheduler.hpp>
template class HPX_EXPORT hpx::threads::detail::thread_pool<
    hpx::threads::policies::local_priority_queue_scheduler<> >;

#if defined(HPX_HAVE_ABP_SCHEDULER)
template class HPX_EXPORT hpx::threads::detail::thread_pool<
    hpx::threads::policies::abp_fifo_priority_queue_scheduler>;
#endif

#if defined(HPX_HAVE_HIERARCHY_SCHEDULER)
#include <hpx/runtime/threads/policies/hierarchy_scheduler.hpp>
template class HPX_EXPORT hpx::threads::detail::thread_pool<
    hpx::threads::policies::hierarchy_scheduler<> >;
#endif

#if defined(HPX_HAVE_PERIODIC_PRIORITY_SCHEDULER)
#include <hpx/runtime/threads/policies/periodic_priority_queue_scheduler.hpp>
template class HPX_EXPORT hpx::threads::detail::thread_pool<
    hpx::threads::policies::periodic_priority_queue_scheduler<> >;
#endif



Line	% of fetches	Source
1		// Copyright (c) 2007-2015 Hartmut Kaiser
2		//
3		// Distributed under the Boost Software License, Version 1.0. (See accompanying
4		// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6		#include <hpx/runtime/threads/detail/thread_pool.hpp>
7
8		#include <hpx/error_code.hpp>
9		#include <hpx/exception.hpp>
10		#include <hpx/state.hpp>
11		#include <hpx/throw_exception.hpp>
12		#include <hpx/lcos/local/no_mutex.hpp>
13		#include <hpx/runtime/get_worker_thread_num.hpp>
14		#include <hpx/runtime/threads/detail/create_thread.hpp>
15		#include <hpx/runtime/threads/detail/create_work.hpp>
16		#include <hpx/runtime/threads/detail/scheduling_loop.hpp>
17		#include <hpx/runtime/threads/detail/set_thread_state.hpp>
18		#include <hpx/runtime/threads/detail/thread_num_tss.hpp>
19		#include <hpx/runtime/threads/policies/callback_notifier.hpp>
20		#include <hpx/runtime/threads/topology.hpp>
21		#include <hpx/util/assert.hpp>
22		#include <hpx/util/bind.hpp>
23		#include <hpx/util/logging.hpp>
24		#include <hpx/util/hardware/timestamp.hpp>
25		#include <hpx/util/high_resolution_clock.hpp>
26		#include <hpx/util/unlock_guard.hpp>
27
28		#include <boost/atomic.hpp>
29		#include <boost/exception_ptr.hpp>
30		#include <boost/system/system_error.hpp>
31		#include <boost/thread/barrier.hpp>
32		#include <boost/thread/mutex.hpp>
33		#include <boost/thread/thread.hpp>
34
35		#include <algorithm>
36		#include <cstddef>
37		#include <cstdint>
38		#include <exception>
39		#include <functional>
40		#include <iomanip>
41		#include <mutex>
42		#include <numeric>
43
44		namespace hpx { namespace threads { namespace detail
45		{
46		///////////////////////////////////////////////////////////////////////////
47		template <typename Scheduler>
48		void thread_pool<Scheduler>::init_tss(std::size_t num)
49		{
50		thread_num_tss_.init_tss(num);
51		}
52
53		template <typename Scheduler>
54		void thread_pool<Scheduler>::deinit_tss()
55		{
56		thread_num_tss_.deinit_tss();
57		}
58
59		///////////////////////////////////////////////////////////////////////////
60		template <typename Scheduler>
61		thread_pool<Scheduler>::thread_pool(Scheduler& sched,
62		threads::policies::callback_notifier& notifier,
63		char const* pool_name, policies::scheduler_mode m)
64		: sched_(sched),
65		notifier_(notifier),
66		pool_name_(pool_name),
67		thread_count_(0),
68		used_processing_units_(),
69		mode_(m)
70		{
71		timestamp_scale_ = 1.0;
72		}
73
74		template <typename Scheduler>
75		thread_pool<Scheduler>::~thread_pool()
76		{
77		if (!threads_.empty()) {
78		if (!sched_.has_reached_state(state_suspended))
79		{
80		// still running
81		lcos::local::no_mutex mtx;
82		std::unique_lock<lcos::local::no_mutex> l(mtx);
83		stop_locked(l);
84		}
85		threads_.clear();
86		}
87		}
88
89		///////////////////////////////////////////////////////////////////////////
90		template <typename Scheduler>
91		hpx::state thread_pool<Scheduler>::get_state() const
92		{
93		// get_worker_thread_num returns the global thread number which might
94		// be too large. This function might get called from within
95		// background_work inside the os executors
96		if (thread_count_ != 0)
97		{
98		std::size_t num_thread = get_worker_thread_num() % thread_count_;
99		if (num_thread != std::size_t(-1))
100		return get_state(num_thread);
101		}
102		return sched_.get_minmax_state().second;
103		}
104
105		template <typename Scheduler>
106		hpx::state thread_pool<Scheduler>::get_state(std::size_t num_thread) const
107		{
108		HPX_ASSERT(num_thread != std::size_t(-1));
109		return sched_.get_state(num_thread).load();
110		}
111
112		template <typename Scheduler>
113		bool thread_pool<Scheduler>::has_reached_state(hpx::state s) const
114		{
115		return sched_.has_reached_state(s);
116		}
117
118		///////////////////////////////////////////////////////////////////////////
119		template <typename Scheduler>
120		std::size_t thread_pool<Scheduler>::init(std::size_t num_threads,
121		policies::init_affinity_data const& data)
122		{
123		topology const& topology_ = get_topology();
124		std::size_t cores_used = sched_.Scheduler::init(data, topology_);
125
126		resize(used_processing_units_, threads::hardware_concurrency());
127		for (std::size_t i = 0; i != num_threads; ++i)
128		used_processing_units_ \|= sched_.Scheduler::get_pu_mask(topology_, i);
129
130		return cores_used;
131		}
132
133		///////////////////////////////////////////////////////////////////////////
134		template <typename Scheduler>
135		std::size_t thread_pool<Scheduler>::get_pu_num(std::size_t num_thread) const
136		{
137		return sched_.Scheduler::get_pu_num(num_thread);
138		}
139
140		template <typename Scheduler>
141		mask_cref_type thread_pool<Scheduler>::get_pu_mask(
142		topology const& topology, std::size_t num_thread) const
143		{
144		return sched_.Scheduler::get_pu_mask(topology, num_thread);
145		}
146
147		template <typename Scheduler>
148		mask_cref_type thread_pool<Scheduler>::get_used_processing_units() const
149		{
150		return used_processing_units_;
151		}
152
153		template <typename Scheduler>
154		void thread_pool<Scheduler>::do_some_work(std::size_t num_thread)
155		{
156		sched_.Scheduler::do_some_work(num_thread);
157		}
158
159		template <typename Scheduler>
160		void thread_pool<Scheduler>::report_error(std::size_t num,
161		boost::exception_ptr const& e)
162		{
163		sched_.set_all_states(state_terminating);
164		notifier_.on_error(num, e);
165		sched_.Scheduler::on_error(num, e);
166		}
167
168		///////////////////////////////////////////////////////////////////////////
169		template <typename Scheduler>
170		void thread_pool<Scheduler>::create_thread(thread_init_data& data,
171		thread_id_type& id, thread_state_enum initial_state, bool run_now,
172		error_code& ec)
173		{
174		// verify state
175		if (thread_count_ == 0 && !sched_.is_state(state_running))
176		{
177		// thread-manager is not currently running
178		HPX_THROWS_IF(ec, invalid_status,
179		"thread_pool<Scheduler>::create_thread",
180		"invalid state: thread pool is not running");
181		return;
182		}
183
184		detail::create_thread(&sched_, data, id, initial_state, run_now, ec); //-V601
185		}
186
187		template <typename Scheduler>
188		void thread_pool<Scheduler>::create_work(thread_init_data& data,
189		thread_state_enum initial_state, error_code& ec)
190		{
191		// verify state
192		if (thread_count_ == 0 && !sched_.is_state(state_running))
193		{
194		// thread-manager is not currently running
195		HPX_THROWS_IF(ec, invalid_status,
196		"thread_pool<Scheduler>::create_work",
197		"invalid state: thread pool is not running");
198		return;
199		}
200
201		detail::create_work(&sched_, data, initial_state, ec); //-V601
202		}
203
204		template <typename Scheduler>
205		thread_state thread_pool<Scheduler>::set_state(
206		thread_id_type const& id, thread_state_enum new_state,
207		thread_state_ex_enum new_state_ex, thread_priority priority,
208		error_code& ec)
209		{
210		return detail::set_thread_state(id, new_state, //-V107
211		new_state_ex, priority, get_worker_thread_num(), ec);
212		}
213
214		template <typename Scheduler>
215		thread_id_type thread_pool<Scheduler>::set_state(
216		util::steady_time_point const& abs_time,
217		thread_id_type const& id, thread_state_enum newstate,
218		thread_state_ex_enum newstate_ex, thread_priority priority,
219		error_code& ec)
220		{
221		return detail::set_thread_state_timed(sched_, abs_time, id,
222		newstate, newstate_ex, priority, get_worker_thread_num(), ec);
223		}
224
225		template <typename Scheduler>
226		void thread_pool<Scheduler>::abort_all_suspended_threads()
227		{
228		sched_.Scheduler::abort_all_suspended_threads();
229		}
230
231		template <typename Scheduler>
232		bool thread_pool<Scheduler>::cleanup_terminated(bool delete_all)
233		{
234		return sched_.Scheduler::cleanup_terminated(delete_all);
235		}
236
237		///////////////////////////////////////////////////////////////////////////
238		template <typename Scheduler>
239		std::size_t thread_pool<Scheduler>::get_worker_thread_num() const
240		{
241		return thread_num_tss_.get_worker_thread_num();
242		}
243
244		template <typename Scheduler>
245		boost::thread& thread_pool<Scheduler>::get_os_thread_handle(
246		std::size_t num_thread)
247		{
248		HPX_ASSERT(num_thread < threads_.size());
249		return threads_[threads_.size() - num_thread - 1];
250		}
251
252		template <typename Scheduler>
253		std::int64_t thread_pool<Scheduler>::get_thread_count(
254		thread_state_enum state, thread_priority priority,
255		std::size_t num, bool reset) const
256		{
257		return sched_.Scheduler::get_thread_count(state, priority, num, reset);
258		}
259
260		template <typename Scheduler>
261		bool thread_pool<Scheduler>::enumerate_threads(
262		util::function_nonser<bool(thread_id_type)> const& f,
263		thread_state_enum state) const
264		{
265		return sched_.Scheduler::enumerate_threads(f, state);
266		}
267
268		template <typename Scheduler>
269		void thread_pool<Scheduler>::reset_thread_distribution()
270		{
271		return sched_.Scheduler::reset_thread_distribution();
272		}
273
274		template <typename Scheduler>
275		void thread_pool<Scheduler>::set_scheduler_mode(
276		threads::policies::scheduler_mode mode)
277		{
278		return sched_.set_scheduler_mode(mode);
279		}
280
281		///////////////////////////////////////////////////////////////////////////
282		template <typename Scheduler>
283		bool thread_pool<Scheduler>::run(std::unique_lock<boost::mutex>& l,
284		std::size_t num_threads)
285		{
286		HPX_ASSERT(l.owns_lock());
287
288		LTM_(info) //-V128
289		<< "thread_pool::run: " << pool_name_
290		<< " number of processing units available: " //-V128
291		<< threads::hardware_concurrency();
292		LTM_(info) //-V128
293		<< "thread_pool::run: " << pool_name_
294		<< " creating " << num_threads << " OS thread(s)"; //-V128
295
296		if (0 == num_threads) {
297		HPX_THROW_EXCEPTION(bad_parameter,
298		"thread_pool::run", "number of threads is zero");
299		}
300
301		if (!threads_.empty() \|\| sched_.has_reached_state(state_running))
302		return true; // do nothing if already running
303
304		executed_threads_.resize(num_threads);
305		executed_thread_phases_.resize(num_threads);
306
307		tfunc_times_.resize(num_threads);
308		exec_times_.resize(num_threads);
309
310		reset_tfunc_times_.resize(num_threads);
311
312		// scale timestamps to nanoseconds
313		std::uint64_t base_timestamp = util::hardware::timestamp();
314		std::uint64_t base_time = util::high_resolution_clock::now();
315		std::uint64_t curr_timestamp = util::hardware::timestamp();
316		std::uint64_t curr_time = util::high_resolution_clock::now();
317
318		while ((curr_time - base_time) <= 100000)
319		{
320		curr_timestamp = util::hardware::timestamp();
321		curr_time = util::high_resolution_clock::now();
322		}
323
324		if (curr_timestamp - base_timestamp != 0)
325		{
326		timestamp_scale_ = double(curr_time - base_time) /
327		double(curr_timestamp - base_timestamp);
328		}
329
330		#if defined(HPX_HAVE_THREAD_CUMULATIVE_COUNTS)
331		// timestamps/values of last reset operation for various performance
332		// counters
333		reset_executed_threads_.resize(num_threads);
334		reset_executed_thread_phases_.resize(num_threads);
335
336		#if defined(HPX_HAVE_THREAD_IDLE_RATES)
337		// timestamps/values of last reset operation for various performance
338		// counters
339		reset_thread_duration_.resize(num_threads);
340		reset_thread_duration_times_.resize(num_threads);
341
342		reset_thread_overhead_.resize(num_threads);
343		reset_thread_overhead_times_.resize(num_threads);
344		reset_thread_overhead_times_total_.resize(num_threads);
345
346		reset_thread_phase_duration_.resize(num_threads);
347		reset_thread_phase_duration_times_.resize(num_threads);
348
349		reset_thread_phase_overhead_.resize(num_threads);
350		reset_thread_phase_overhead_times_.resize(num_threads);
351		reset_thread_phase_overhead_times_total_.resize(num_threads);
352
353		reset_cumulative_thread_duration_.resize(num_threads);
354
355		reset_cumulative_thread_overhead_.resize(num_threads);
356		reset_cumulative_thread_overhead_total_.resize(num_threads);
357		#endif
358		#endif
359
360		#if defined(HPX_HAVE_THREAD_IDLE_RATES)
361		reset_idle_rate_time_.resize(num_threads);
362		reset_idle_rate_time_total_.resize(num_threads);
363
364		#if defined(HPX_HAVE_THREAD_CREATION_AND_CLEANUP_RATES)
365		reset_creation_idle_rate_time_.resize(num_threads);
366		reset_creation_idle_rate_time_total_.resize(num_threads);
367
368		reset_cleanup_idle_rate_time_.resize(num_threads);
369		reset_cleanup_idle_rate_time_total_.resize(num_threads);
370		#endif
371		#endif
372
373		LTM_(info)
374		<< "thread_pool::run: " << pool_name_
375		<< " timestamp_scale: " << timestamp_scale_; //-V128
376
377		try {
378		HPX_ASSERT(startup_.get() == nullptr);
379		startup_.reset(
380		new boost::barrier(static_cast<unsigned>(num_threads+1))
381		);
382
383		// run threads and wait for initialization to complete
384
385		topology const& topology_ = get_topology();
386
387		std::size_t thread_num = num_threads;
388		while (thread_num-- != 0) {
389		threads::mask_cref_type mask =
390		sched_.Scheduler::get_pu_mask(topology_, thread_num);
391
392		LTM_(info) //-V128
393		<< "thread_pool::run: " << pool_name_
394		<< " create OS thread " << thread_num //-V128
395		<< ": will run on processing units within this mask: "
396		#if !defined(HPX_HAVE_MORE_THAN_64_THREADS) \|\| \
397		(defined(HPX_HAVE_MAX_CPU_COUNT) && HPX_HAVE_MAX_CPU_COUNT <= 64)
398		<< std::hex << "0x" << mask;
399		#else
400		<< "0b" << mask;
401		#endif
402
403		// create a new thread
404		threads_.push_back(boost::thread(
405		&thread_pool::thread_func, this, thread_num,
406		std::ref(topology_), std::ref(*startup_)
407		));
408
409		// set the new threads affinity (on Windows systems)
410		if (any(mask))
411		{
412		error_code ec(lightweight);
413		topology_.set_thread_affinity_mask(threads_.back(), mask, ec);
414		if (ec)
415		{
416		LTM_(warning) //-V128
417		<< "thread_pool::run: " << pool_name_
418		<< " setting thread affinity on OS thread " //-V128
419		<< thread_num << " failed with: "
420		<< ec.get_message();
421		}
422		}
423		else
424		{
425		LTM_(debug) //-V128
426		<< "thread_pool::run: " << pool_name_
427		<< " setting thread affinity on OS thread " //-V128
428		<< thread_num << " was explicitly disabled.";
429		}
430		}
431
432		// the main thread needs to have a unique thread_num
433		init_tss(num_threads);
434		startup_->wait();
435
436		// The scheduler is now running.
437		sched_.set_all_states(state_running);
438		}
439		catch (std::exception const& e) {
440		LTM_(always)
441		<< "thread_pool::run: " << pool_name_
442		<< " failed with: " << e.what();
443
444		// trigger the barrier
445		if (startup_.get() != nullptr)
446		{
447		while (num_threads-- != 0 && !startup_->wait())
448		;
449		}
450
451		stop(l);
452		threads_.clear();
453
454		return false;
455		}
456
457		LTM_(info) << "thread_pool::run: " << pool_name_ << " running";
458		return true;
459		}
460
461		///////////////////////////////////////////////////////////////////////////
462		template <typename Scheduler>
463		void thread_pool<Scheduler>::stop (
464		std::unique_lock<boost::mutex>& l, bool blocking)
465		{
466		HPX_ASSERT(l.owns_lock());
467
468		return stop_locked(l, blocking);
469		}
470
471		template <typename Scheduler>
472		template <typename Lock>
473		void thread_pool<Scheduler>::stop_locked(Lock& l, bool blocking)
474		{
475		LTM_(info)
476		<< "thread_pool::stop: " << pool_name_
477		<< " blocking(" << std::boolalpha << blocking << ")";
478
479		deinit_tss();
480
481		if (!threads_.empty()) {
482		// set state to stopping
483		sched_.set_all_states(state_stopping);
484
485		// make sure we're not waiting
486		sched_.Scheduler::do_some_work(std::size_t(-1));
487
488		if (blocking) {
489		for (std::size_t i = 0; i != threads_.size(); ++i)
490		{
491		// make sure no OS thread is waiting
492		LTM_(info)
493		<< "thread_pool::stop: " << pool_name_
494		<< " notify_all";
495
496		sched_.Scheduler::do_some_work(std::size_t(-1));
497
498		LTM_(info) //-V128
499		<< "thread_pool::stop: " << pool_name_
500		<< " join:" << i; //-V128