/home/users/khuck/src/hpx-lsu/hpx/runtime/threads/coroutines/detail/context_linux_x86.hpp

Line% of fetchesSource
1  
//  Copyright (c) 2006, Giovanni P. Deretta
2  
//  Copyright (c) 2007 Robert Perricone
3  
//  Copyright (c) 2007-2012 Hartmut Kaiser
4  
//  Copyright (c) 2011 Bryce Adelstein-Lelbach
5  
//  Copyright (c) 2013-2016 Thomas Heller
6  
//
7  
//  Distributed under the Boost Software License, Version 1.0.
8  
//  (See accompanying file LICENSE_1_0.txt or copy at
9  
//  http://www.boost.org/LICENSE_1_0.txt)
10  
11  
#ifndef HPX_RUNTIME_THREADS_COROUTINES_DETAIL_CONTEXT_LINUX_HPP
12  
#define HPX_RUNTIME_THREADS_COROUTINES_DETAIL_CONTEXT_LINUX_HPP
13  
14  
#if defined(__linux) || defined(linux) || defined(__linux__)
15  
16  
#include <hpx/config.hpp>
17  
#include <hpx/runtime/threads/coroutines/detail/get_stack_pointer.hpp>
18  
#include <hpx/runtime/threads/coroutines/detail/posix_utility.hpp>
19  
#include <hpx/runtime/threads/coroutines/detail/swap_context.hpp>
20  
#include <hpx/util/assert.hpp>
21  
#include <hpx/util/get_and_reset_value.hpp>
22  
23  
#include <cstddef>
24  
#include <cstdint>
25  
#include <cstdlib>
26  
#include <stdexcept>
27  
#include <sys/param.h>
28  
29  
#include <boost/atomic.hpp>
30  
#include <boost/format.hpp>
31  
32  
#if defined(HPX_HAVE_VALGRIND)
33  
#if defined(__GNUG__) && !defined(__INTEL_COMPILER)
34  
#if defined(HPX_GCC_DIAGNOSTIC_PRAGMA_CONTEXTS)
35  
#pragma GCC diagnostic push
36  
#endif
37  
#pragma GCC diagnostic ignored "-Wpointer-arith"
38  
#endif
39  
#include <valgrind/valgrind.h>
40  
#endif
41  
42  
/*
43  
 * Defining HPX_COROUTINE_NO_SEPARATE_CALL_SITES will disable separate
44  
 * invoke, and yield swap_context functions. Separate calls sites
45  
 * increase performance by 25% at least on P4 for invoke+yield back loops
46  
 * at the cost of a slightly higher instruction cache use and is thus enabled by
47  
 * default.
48  
 */
49  
50  
#if defined(__x86_64__)
51  
extern "C" void swapcontext_stack (void***, void**) throw();
52  
extern "C" void swapcontext_stack2 (void***, void**) throw();
53  
extern "C" void swapcontext_stack3 (void***, void**) throw();
54  
#else
55  
extern "C" void swapcontext_stack (void***, void**) throw() __attribute((regparm(2)));
56  
extern "C" void swapcontext_stack2 (void***, void**) throw()__attribute((regparm(2)));
57  
extern "C" void swapcontext_stack3 (void***, void**) throw()__attribute((regparm(2)));
58  
#endif
59  
60  
///////////////////////////////////////////////////////////////////////////////
61  
namespace hpx { namespace threads { namespace coroutines
62  
{
63  
    // some platforms need special preparation of the main thread
64  
    struct prepare_main_thread
65  
    {
66  
        prepare_main_thread() {}
67  
        ~prepare_main_thread() {}
68  
    };
69  
70  
    namespace detail { namespace lx
71  
    {
72  
        template <typename TO, typename FROM>
73  
        TO nasty_cast(FROM f)
74  
        {
75  
            union {
76  
                FROM f; TO t;
77  
            } u;
78  
            u.f = f;
79  
            return u.t;
80  
        }
81  
82  
        template<typename T>
83  
        HPX_FORCEINLINE void trampoline(T* fun);
84  
85  
        template<typename T>
86  
        void trampoline(T* fun)
87  
        {
88  
            (*fun)();
89  
            std::abort();
90  
        }
91  
92  
        class x86_linux_context_impl;
93  
94  
        class x86_linux_context_impl_base : detail::context_impl_base
95  
        {
96  
        public:
97  
            x86_linux_context_impl_base() {}
98  
99  
            void prefetch() const
100  
            {
101  
#if defined(__x86_64__)
102  
                HPX_ASSERT(sizeof(void*) == 8);
103  
#else
104  
                HPX_ASSERT(sizeof(void*) == 4);
105  
#endif
106  
107  
                __builtin_prefetch(m_sp, 1, 3);
108  
                __builtin_prefetch(m_sp, 0, 3);
109  
                __builtin_prefetch(static_cast<void**>(m_sp) + 64 / sizeof(void*), 1, 3);
110  
                __builtin_prefetch(static_cast<void**>(m_sp) + 64 / sizeof(void*), 0, 3);
111  
#if !defined(__x86_64__)
112  
                __builtin_prefetch(static_cast<void**>(m_sp) + 32 / sizeof(void*), 1, 3);
113  
                __builtin_prefetch(static_cast<void**>(m_sp) + 32 / sizeof(void*), 0, 3);
114  
                __builtin_prefetch(static_cast<void**>(m_sp) - 32 / sizeof(void*), 1, 3);
115  
                __builtin_prefetch(static_cast<void**>(m_sp) - 32 / sizeof(void*), 0, 3);
116  
#endif
117  
                __builtin_prefetch(static_cast<void**>(m_sp) - 64 / sizeof(void*), 1, 3);
118  
                __builtin_prefetch(static_cast<void**>(m_sp) - 64 / sizeof(void*), 0, 3);
119  
            }
120  
121  
            /**
122  
             * Free function. Saves the current context in @p from
123  
             * and restores the context in @p to.
124  
             * @note This function is found by ADL.
125  
             */
126  
            friend void swap_context(x86_linux_context_impl_base& from,
127  
                x86_linux_context_impl_base const& to, default_hint);
128  
129  
            friend void swap_context(x86_linux_context_impl_base& from,
130  
                x86_linux_context_impl_base const& to, yield_hint);
131  
132  
        protected:
133  
            void ** m_sp;
134  
        };
135  
136  
        class x86_linux_context_impl : public x86_linux_context_impl_base
137  
        {
138  
        public:
139  
            enum { default_stack_size = 4 * EXEC_PAGESIZE };
140  
141  
            typedef x86_linux_context_impl_base context_impl_base;
142  
143  
            x86_linux_context_impl()
144  
                : m_stack(nullptr)
145  
            {}
146  
147  
            /**
148  
             * Create a context that on restore invokes Functor on
149  
             *  a new stack. The stack size can be optionally specified.
150  
             */
151  
            template<typename Functor>
152  
            x86_linux_context_impl(Functor& cb, std::ptrdiff_t stack_size = -1)
153  
              : m_stack_size(stack_size == -1
154  
                  ? static_cast<std::ptrdiff_t>(default_stack_size)
155  
                  : stack_size),
156  
                m_stack(nullptr)
157  
            {
158  
                if (0 != (m_stack_size % EXEC_PAGESIZE))
159  
                {
160  
                    throw std::runtime_error(
161  
                        boost::str(boost::format(
162  
                            "stack size of %1% is not page aligned, page size is %2%")
163  
                            % m_stack_size % EXEC_PAGESIZE));
164  
                }
165  
166  
                if (0 >= m_stack_size)
167  
                {
168  
                    throw std::runtime_error(
169  
                        boost::str(boost::format("stack size of %1% is invalid") %
170  
                            m_stack_size));
171  
                }
172  
173  
                m_stack = posix::alloc_stack(static_cast<std::size_t>(m_stack_size));
174  
                HPX_ASSERT(m_stack);
175  
                posix::watermark_stack(m_stack, static_cast<std::size_t>(m_stack_size));
176  
177  
                typedef void fun(Functor*);
178  
                fun * funp = trampoline;
179  
180  
                m_sp = (static_cast<void**>(m_stack)
181  
                    + static_cast<std::size_t>(m_stack_size) / sizeof(void*))
182  
                    - context_size;
183  
184  
                m_sp[backup_cb_idx] = m_sp[cb_idx] = &cb;
185  
                m_sp[backup_funp_idx] = m_sp[funp_idx] = nasty_cast<void*>(funp);
186  
187  
#if defined(HPX_HAVE_VALGRIND) && !defined(NVALGRIND)
188  
                {
189  
                    void * eos = static_cast<char*>(m_stack) + m_stack_size;
190  
                    m_sp[valgrind_id_idx] = reinterpret_cast<void*>(
191  
                        VALGRIND_STACK_REGISTER(m_stack, eos));
192  
                }
193  
#endif
194  
            }
195  
196  
            ~x86_linux_context_impl()
197  
            {
198  
                if (m_stack)
199  
                {
200  
#if defined(HPX_HAVE_VALGRIND) && !defined(NVALGRIND)
201  
                    VALGRIND_STACK_DEREGISTER(
202  
                        reinterpret_cast<std::size_t>(m_sp[valgrind_id_idx]));
203  
#endif
204  
                    posix::free_stack(m_stack, static_cast<std::size_t>(m_stack_size));
205  
                }
206  
            }
207  
208  
            // Return the size of the reserved stack address space.
209  
            std::ptrdiff_t get_stacksize() const
210  
            {
211  
                return m_stack_size;
212  
            }
213  
214  
            void reset_stack()
215  
            {
216  
                if (m_stack)
217  
                {
218  
                    if (posix::reset_stack(
219  
                        m_stack, static_cast<std::size_t>(m_stack_size)))
220  
                        increment_stack_unbind_count();
221  
                }
222  
            }
223  
224  
            void rebind_stack()
225  
            {
226  
                if (m_stack)
227  
                {
228  
                    increment_stack_recycle_count();
229  
230  
                    // On rebind, we initialize our stack to ensure a virgin stack
231  
                    m_sp = (static_cast<void**>(m_stack)
232  
                        + static_cast<std::size_t>(m_stack_size) / sizeof(void*))
233  
                        - context_size;
234  
235  
                    m_sp[cb_idx] = m_sp[backup_cb_idx];
236  
                    m_sp[funp_idx] = m_sp[backup_funp_idx];
237  
                }
238  
            }
239  
240  
            std::ptrdiff_t get_available_stack_space()
241  
            {
242  
                return get_stack_ptr() - reinterpret_cast<std::size_t>(m_stack) -
243  
                    context_size;
244  
            }
245  
246  
            typedef boost::atomic<std::int64_t> counter_type;
247  
248  
            static counter_type& get_stack_unbind_counter()
249  
            {
250  
                static counter_type counter(0);
251  
                return counter;
252  
            }
253  
254  
            static std::uint64_t get_stack_unbind_count(bool reset)
255  
            {
256  
                return util::get_and_reset_value(get_stack_unbind_counter(), reset);
257  
            }
258  
259  
            static std::uint64_t increment_stack_unbind_count()
260  
            {
261  
                return ++get_stack_unbind_counter();
262  
            }
263  
264  
            static counter_type& get_stack_recycle_counter()
265  
            {
266  
                static counter_type counter(0);
267  
                return counter;
268  
            }
269  
270  
            static std::uint64_t get_stack_recycle_count(bool reset)
271  
            {
272  
                return util::get_and_reset_value(get_stack_recycle_counter(), reset);
273  
            }
274  
275  
            static std::uint64_t increment_stack_recycle_count()
276  
            {
277  
                return ++get_stack_recycle_counter();
278  
            }
279  
280  
            friend void swap_context(x86_linux_context_impl_base& from,
281  
                x86_linux_context_impl_base const& to, default_hint);
282  
283  
            friend void swap_context(x86_linux_context_impl_base& from,
284  
                x86_linux_context_impl_base const& to, yield_hint);
285  
286  
            // global functions to be called for each OS-thread after it started
287  
            // running and before it exits
288  
            static void thread_startup(char const* /*thread_type*/)
289  
            {}
290  
291  
            static void thread_shutdown()
292  
            {}
293  
294  
        private:
295  
#if defined(__x86_64__)
296  
            /** structure of context_data:
297  
             * 13: backup address of function to execute
298  
             * 12: backup address of trampoline
299  
             * 11: additional alignment (or valgrind_id if enabled)
300  
             * 10: parm 0 of trampoline
301  
             * 9:  dummy return address for trampoline
302  
             * 8:  return addr (here: start addr)
303  
             * 7:  rbp
304  
             * 6:  rbx
305  
             * 5:  rsi
306  
             * 4:  rdi
307  
             * 3:  r12
308  
             * 2:  r13
309  
             * 1:  r14
310  
             * 0:  r15
311  
             **/
312  
#if defined(HPX_HAVE_VALGRIND) && !defined(NVALGRIND)
313  
            static const std::size_t valgrind_id_idx = 11;
314  
#endif
315  
316  
            static const std::size_t context_size = 14;
317  
            static const std::size_t backup_cb_idx = 13;
318  
            static const std::size_t backup_funp_idx = 12;
319  
            static const std::size_t cb_idx = 10;
320  
            static const std::size_t funp_idx = 8;
321  
#else
322  
            /** structure of context_data:
323  
             * 9: valgrind_id (if enabled)
324  
             * 8: backup address of function to execute
325  
             * 7: backup address of trampoline
326  
             * 6: parm 0 of trampoline
327  
             * 5: dummy return address for trampoline
328  
             * 4: return addr (here: start addr)
329  
             * 3: ebp
330  
             * 2: ebx
331  
             * 1: esi
332  
             * 0: edi
333  
             **/
334  
#if defined(HPX_HAVE_VALGRIND) && !defined(NVALGRIND)
335  
            static const std::size_t context_size = 10;
336  
            static const std::size_t valgrind_id_idx = 9;
337  
#else
338  
            static const std::size_t context_size = 9;
339  
#endif
340  
341  
            static const std::size_t backup_cb_idx = 8;
342  
            static const std::size_t backup_funp_idx = 7;
343  
            static const std::size_t cb_idx = 6;
344  
            static const std::size_t funp_idx = 4;
345  
#endif
346  
347  
            std::ptrdiff_t m_stack_size;
348  
            void* m_stack;
349  
        };
350  
351  
        typedef x86_linux_context_impl context_impl;
352  
353  
        /**
354  
         * Free function. Saves the current context in @p from
355  
         * and restores the context in @p to.
356  
         * @note This function is found by ADL.
357  
         */
358  
        inline void swap_context(x86_linux_context_impl_base& from,
359  
            x86_linux_context_impl_base const& to, default_hint)
360  
        {
361  
            //        HPX_ASSERT(*(void**)to.m_stack == (void*)~0);
362  
            to.prefetch();
363  
            swapcontext_stack(&from.m_sp, to.m_sp);
364  
        }
365  
366  
        inline void swap_context(x86_linux_context_impl_base& from,
367  
            x86_linux_context_impl_base const& to, yield_hint)
368  
        {
369  
            //        HPX_ASSERT(*(void**)from.m_stack == (void*)~0);
370  
            to.prefetch();
371  
#ifndef HPX_COROUTINE_NO_SEPARATE_CALL_SITES
372  
            swapcontext_stack2(&from.m_sp, to.m_sp);
373  
#else
374  
            swapcontext_stack(&from.m_sp, to.m_sp);
375  
#endif
376  
        }
377  
    }}
378  
}}}
379  
380  
#if defined(HPX_HAVE_VALGRIND)
381  
#if defined(__GNUG__) && !defined(__INTEL_COMPILER)
382  
#if defined(HPX_GCC_DIAGNOSTIC_PRAGMA_CONTEXTS)
383  
#pragma GCC diagnostic pop
384  
#endif
385  
#endif
386  
#endif
387  
388  
#else
389  
390  
#error This header can only be included when compiling for linux systems.
391  
392  
#endif
393  
394  
#endif /*HPX_RUNTIME_THREADS_COROUTINES_DETAIL_CONTEXT_LINUX_HPP*/
395  

Copyright (c) 2006-2012 Rogue Wave Software, Inc. All Rights Reserved.
Patents pending.