Rework of execution policy (multithreading)

2025-03-10 00:10:10 +01:00 · 2020-11-06 17:30:24 +01:00 · 2020-11-06 17:30:24 +01:00 · 08d069ce32
commit 08d069ce32
parent 03c454951e
5 changed files with 135 additions and 24 deletions
--- a/PdfForQtLib/sources/pdfexecutionpolicy.cpp
+++ b/PdfForQtLib/sources/pdfexecutionpolicy.cpp
@ -19,13 +19,26 @@
 #include "pdfexecutionpolicy.h"

 #include <QThread>
+#include <QApplication>

 namespace pdf
 {

 struct PDFExecutionPolicyHolder
 {
+    PDFExecutionPolicyHolder()
+    {
+        qAddPostRoutine(&PDFExecutionPolicy::finalize);
+    }
+    ~PDFExecutionPolicyHolder()
+    {
+        auxiliary.waitForDone();
+        primary.waitForDone();
+    }
+
    PDFExecutionPolicy policy;
+    QThreadPool primary;
+    QThreadPool auxiliary;
 } s_execution_policy;

 void PDFExecutionPolicy::setStrategy(Strategy strategy)
@ -50,13 +63,7 @@ bool PDFExecutionPolicy::isParallelizing(Scope scope)
                    return true; // We are parallelizing pages...

                case Scope::Content:
-                {
-                    // Jakub Melka: this is a bit complicated. We must count number of content streams
-                    // being processed and if it is large enough, then do not parallelize.
-                    const size_t threadLimit = s_execution_policy.policy.m_threadLimit.load(std::memory_order_relaxed);
-                    const size_t contentStreamsCount = s_execution_policy.policy.m_contentStreamsCount.load(std::memory_order_seq_cst);
-                    return contentStreamsCount < threadLimit;
-                }
+                    return false;
            }

            break;
@ -71,6 +78,34 @@ bool PDFExecutionPolicy::isParallelizing(Scope scope)
    return false;
 }

+int PDFExecutionPolicy::getActiveThreadCount(Scope scope)
+{
+    return getThreadPool(scope)->activeThreadCount();
+}
+
+int PDFExecutionPolicy::getMaxThreadCount(Scope scope)
+{
+    return getThreadPool(scope)->maxThreadCount();
+}
+
+void PDFExecutionPolicy::setMaxThreadCount(Scope scope, int count)
+{
+    // Sanitize value!
+    count = qMax(count, 1);
+    getThreadPool(scope)->setMaxThreadCount(count);
+}
+
+int PDFExecutionPolicy::getIdealThreadCount(Scope scope)
+{
+    Q_UNUSED(scope);
+    return QThread::idealThreadCount();
+}
+
+int PDFExecutionPolicy::getContentStreamCount()
+{
+    return s_execution_policy.policy.m_contentStreamsCount.load(std::memory_order_relaxed);
+}
+
 void PDFExecutionPolicy::startProcessingContentStream()
 {
    ++s_execution_policy.policy.m_contentStreamsCount;
@ -81,9 +116,33 @@ void PDFExecutionPolicy::endProcessingContentStream()
    --s_execution_policy.policy.m_contentStreamsCount;
 }

+void PDFExecutionPolicy::finalize()
+{
+    s_execution_policy.auxiliary.waitForDone();
+    s_execution_policy.primary.waitForDone();
+}
+
+QThreadPool* PDFExecutionPolicy::getThreadPool(PDFExecutionPolicy::Scope scope)
+{
+    switch (scope)
+    {
+        case Scope::Page:
+        case Scope::Unknown:
+            return &s_execution_policy.primary;
+
+        case Scope::Content:
+            return &s_execution_policy.auxiliary;
+
+        default:
+            Q_ASSERT(false);
+            break;
+    }
+
+    return nullptr;
+}
+
 PDFExecutionPolicy::PDFExecutionPolicy() :
    m_contentStreamsCount(0),
-    m_threadLimit(QThread::idealThreadCount()),
    m_strategy(Strategy::PageMultithreaded)
 {

--- a/PdfForQtLib/sources/pdfexecutionpolicy.h
+++ b/PdfForQtLib/sources/pdfexecutionpolicy.h
@ -20,6 +20,9 @@

 #include "pdfglobal.h"

+#include <QSemaphore>
+#include <QThreadPool>
+
 #include <atomic>
 #include <execution>

@ -53,15 +56,48 @@ public:
    static void setStrategy(Strategy strategy);

    /// Determines, if we should parallelize for scope
-    /// \param scope Scope for which we want to determine exectution policy
+    /// \param scope Scope for which we want to determine execution policy
    static bool isParallelizing(Scope scope);

+    template<typename ForwardIt, typename UnaryFunction>
+    class Runnable : public QRunnable
+    {
+    public:
+        explicit inline Runnable(ForwardIt it, UnaryFunction* function, QSemaphore* semaphore) :
+            m_forwardIt(qMove(it)),
+            m_function(function),
+            m_semaphore(semaphore)
+        {
+            setAutoDelete(true);
+        }
+
+        virtual void run() override
+        {
+            QSemaphoreReleaser semaphoreReleaser(m_semaphore);
+            (*m_function)(*m_forwardIt);
+        }
+
+    private:
+        ForwardIt m_forwardIt;
+        UnaryFunction* m_function;
+        QSemaphore* m_semaphore;
+    };
+
    template<typename ForwardIt, typename UnaryFunction>
    static void execute(Scope scope, ForwardIt first, ForwardIt last, UnaryFunction f)
    {
        if (isParallelizing(scope))
        {
-            std::for_each(std::execution::parallel_policy(), first, last, f);
+            QSemaphore semaphore(0);
+            int count = static_cast<int>(std::distance(first, last));
+
+            QThreadPool* pool = getThreadPool(scope);
+            for (auto it = first; it != last; ++it)
+            {
+                pool->start(new Runnable(it, &f, &semaphore));
+            }
+
+            semaphore.acquire(count);
        }
        else
        {
@ -72,29 +108,45 @@ public:
    template<typename ForwardIt, typename Comparator>
    static void sort(Scope scope, ForwardIt first, ForwardIt last, Comparator f)
    {
-        if (isParallelizing(scope))
-        {
-            std::sort(std::execution::parallel_policy(), first, last, f);
-        }
-        else
-        {
-            std::sort(std::execution::sequenced_policy(), first, last, f);
-        }
+        Q_UNUSED(scope);
+
+        // We always sort by single thread
+        std::sort(std::execution::sequenced_policy(), first, last, f);
    }

+    /// Returns number of active threads for given scope
+    static int getActiveThreadCount(Scope scope);
+
+    /// Returns maximal number of threads for given scope
+    static int getMaxThreadCount(Scope scope);
+
+    /// Sets maximal number of threads for given scope
+    static void setMaxThreadCount(Scope scope, int count);
+
+    /// Returns ideal thread count for given scope
+    static int getIdealThreadCount(Scope scope);
+
+    /// Returns number of currently processed content streams
+    static int getContentStreamCount();
+
    /// Starts processing content stream
    static void startProcessingContentStream();

    /// Ends processing content stream
    static void endProcessingContentStream();

+    /// Finalize multithreading - must be called at the end of program
+    static void finalize();
+
 private:
    friend struct PDFExecutionPolicyHolder;

+    /// Returns thread pool based on scope
+    static QThreadPool* getThreadPool(Scope scope);
+
    explicit PDFExecutionPolicy();

-    std::atomic<size_t> m_contentStreamsCount;
-    std::atomic<size_t> m_threadLimit;
+    std::atomic<int> m_contentStreamsCount;
    std::atomic<Strategy> m_strategy;
 };

--- a/PdfForQtViewer/pdfviewersettings.cpp
+++ b/PdfForQtViewer/pdfviewersettings.cpp
@ -253,7 +253,7 @@ PDFViewerSettings::Settings::Settings() :
    m_thumbnailsCacheLimit(PIXMAP_CACHE_LIMIT),
    m_fontCacheLimit(pdf::DEFAULT_FONT_CACHE_LIMIT),
    m_instancedFontCacheLimit(pdf::DEFAULT_REALIZED_FONT_CACHE_LIMIT),
-    m_multithreadingStrategy(pdf::PDFExecutionPolicy::Strategy::PageMultithreaded),
+    m_multithreadingStrategy(pdf::PDFExecutionPolicy::Strategy::AlwaysMultithreaded),
    m_speechRate(0.0),
    m_speechPitch(0.0),
    m_speechVolume(1.0),
--- a/PdfForQtViewer/pdfviewersettings.h
+++ b/PdfForQtViewer/pdfviewersettings.h
@ -57,7 +57,7 @@ public:
        pdf::PDFReal m_colorTolerance;
        bool m_allowLaunchApplications;
        bool m_allowLaunchURI;
-        pdf::PDFExecutionPolicy::Strategy m_multithreadingStrategy = pdf::PDFExecutionPolicy::Strategy::PageMultithreaded;
+        pdf::PDFExecutionPolicy::Strategy m_multithreadingStrategy;

        // Cache settings
        int m_compiledPageCacheLimit;
--- a/PdfForQtViewer/pdfviewersettingsdialog.ui
+++ b/PdfForQtViewer/pdfviewersettingsdialog.ui
@ -26,7 +26,7 @@
     <item>
      <widget class="QStackedWidget" name="stackedWidget">
       <property name="currentIndex">
-        <number>10</number>
+        <number>0</number>
       </property>
       <widget class="QWidget" name="enginePage">
        <layout class="QVBoxLayout" name="enginePageLayout">
@ -113,7 +113,7 @@
            <item>
             <widget class="QLabel" name="engineInfoLabel">
              <property name="text">
-               <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Select rendering method according to your needs. &lt;span style=&quot; font-weight:600;&quot;&gt;Software rendering&lt;/span&gt; is much slower than hardware accelerated rendering using &lt;span style=&quot; font-weight:600;&quot;&gt;OpenGL rendering&lt;/span&gt;, but it works when OpenGL is not available at your platform. OpenGL rendering is selected as default and is recommended.&lt;/p&gt;&lt;p&gt;OpenGL rendering uses&lt;span style=&quot; font-weight:600;&quot;&gt; multisample antialiasing (MSAA)&lt;/span&gt;, which provides good quality antialiasing. You can turn this feature on or off, but without antialiasing, bad quality image can occur. Samples count affect how much samples per pixel are considered to determine pixel color. It can be a value 1, 2, 4, 8, and 16. Most modern GPUs support at least value 8. Lower this value, if your GPU doesn't support the desired sample count.&lt;/p&gt;&lt;p&gt;&lt;span style=&quot; font-weight:600;&quot;&gt;Prefetch pages &lt;/span&gt;prefetches (pre-renders) pages next to currently viewed pages, to avoid flickering during scrolling. Prefetched pages are stored in the page cache.&lt;/p&gt;&lt;p&gt;&lt;span style=&quot; font-weight:600;&quot;&gt;Multithreading strategy &lt;/span&gt;defines how program will use CPU cores. Engine can use multiple cores. Strategy defines, how engine will use these cores. &lt;span style=&quot; font-weight:600;&quot;&gt;Single thread&lt;/span&gt; strategy uses only one CPU core for rendering page, and for some operations, they aren't parallelized, at the cost of more time needed for operation to be finished. But still, each page will use its own thread to be compiled/drawn. On the other side, there are two multithreading strategies, former is load balanced, latter uses maximum threads. Load balanced strategy tries to optimize number of threads to fit CPU cores, while maximum threads strategy will spawn as much threads as possible to process operations, which can be sometimes unoptimal.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
+               <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Select rendering method according to your needs. &lt;span style=&quot; font-weight:600;&quot;&gt;Software rendering&lt;/span&gt; is much slower than hardware accelerated rendering using &lt;span style=&quot; font-weight:600;&quot;&gt;OpenGL rendering&lt;/span&gt;, but it works when OpenGL is not available at your platform. OpenGL rendering is selected as default and is recommended.&lt;/p&gt;&lt;p&gt;OpenGL rendering uses&lt;span style=&quot; font-weight:600;&quot;&gt; multisample antialiasing (MSAA)&lt;/span&gt;, which provides good quality antialiasing. You can turn this feature on or off, but without antialiasing, bad quality image can occur. Samples count affect how much samples per pixel are considered to determine pixel color. It can be a value 1, 2, 4, 8, and 16. Most modern GPUs support at least value 8. Lower this value, if your GPU doesn't support the desired sample count.&lt;/p&gt;&lt;p&gt;&lt;span style=&quot; font-weight:600;&quot;&gt;Prefetch pages &lt;/span&gt;prefetches (pre-renders) pages next to currently viewed pages, to avoid flickering during scrolling. Prefetched pages are stored in the page cache.&lt;/p&gt;&lt;p&gt;&lt;span style=&quot; font-weight:600;&quot;&gt;Multithreading strategy &lt;/span&gt;defines how program will use CPU cores. Engine can use multiple cores. Strategy defines, how engine will use these cores. &lt;span style=&quot; font-weight:600;&quot;&gt;Single thread&lt;/span&gt; strategy uses only one CPU core for rendering page, and for some operations, they aren't parallelized, at the cost of more time needed for operation to be finished. But still, each page will use its own thread to be compiled/drawn. On the other side, there are two multithreading strategies, former is load balanced, latter uses maximum threads. Load balanced strategy parallelizes only pages, but not processing each individual page content, while maximum threads strategy will spawn as much threads as possible to process operations to achieve best performance.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
              </property>
              <property name="wordWrap">
               <bool>true</bool>