diff --git a/src/JPEGView/JPEGImage.cpp b/src/JPEGView/JPEGImage.cpp index 5c465f06..298bb01d 100644 --- a/src/JPEGView/JPEGImage.cpp +++ b/src/JPEGView/JPEGImage.cpp @@ -14,10 +14,6 @@ #include #include -// Hacky workaround. Look at comment block in CJPEGImage::Resample() -// undefine this flag to investigate which optimization might cause that particular failure (TODO) -#define AVX_SSE_FREEZE_FALLBACK - /////////////////////////////////////////////////////////////////////////////////// // Static helpers /////////////////////////////////////////////////////////////////////////////////// @@ -573,29 +569,6 @@ void* CJPEGImage::Resample(CSize fullTargetSize, CSize clippingSize, CPoint targ EProcessingFlags eProcFlags, double dSharpen, double dRotation, EResizeType eResizeType) { Helpers::CPUType cpu = CSettingsProvider::This().AlgorithmImplementation(); - // NOTE: Hacky workaround... there is probably a very obscure bug in the AVX2 implementation - // which causes WaitForSingleObject to wait indefinitely on SampleUp_HQ_SIMD() - // when window dimensions are > 3224 pixels wide. - // - // This obscure bug ONLY manifests itself on RELEASE builds, and not DEBUG builds, meaning - // it happens only when optimization is turned on. I haven't had the chance to trial and error exactly - // which optimization flag causes the freeze, but that's a future TODO - // - // A lot of trial and error tests has a freeze happening at 3226 pixels wide, but not at 3224 pixels. (client rect pixels) - // (I was not able to test how this all behaves if it's 3224 pixels high, so, when displays get that cool, then we might have to revisit this hack) - // The AVX2 code is very advanced, and I don't have the expertise to debug it -sylikc - // - // The known workarounds based on GitHub issues is to either set CPUType=SSE or HighQualityResampling=false - // - // So, here, we detect and fallback to SSE when the conditions are met. To be safe, I set the limit at 3200 pixels -#ifdef AVX_SSE_FREEZE_FALLBACK - if (cpu == Helpers::CPU_AVX2 && clippingSize.cx > 3200) { - // only override the usage for SSE for these specific conditions - // AVX2 is supposed to be ~2.4x faster than SSE - cpu = Helpers::CPU_SSE; - } -#endif - EFilterType filter = CSettingsProvider::This().DownsamplingFilter(); if (fullTargetSize.cx > 65535 || fullTargetSize.cy > 65535) return NULL; diff --git a/src/JPEGView/ProcessingThreadPool.cpp b/src/JPEGView/ProcessingThreadPool.cpp index e683f89a..b216bdc8 100644 --- a/src/JPEGView/ProcessingThreadPool.cpp +++ b/src/JPEGView/ProcessingThreadPool.cpp @@ -78,7 +78,7 @@ bool CProcessingThreadPool::Process(CProcessingRequest* pRequest) { if (m_nNumThreads == 0) { CProcessingThread::DoProcess(pRequest, 0, nTargetCY); } else { - if (nTargetCX * nTargetCY < 100000 || nTargetCY <= 12) { + if (nTargetCX * nTargetCY < 100000 || nTargetCY < 2 * pRequest->StripPadding) { CProcessingThread::DoProcess(pRequest, 0, nTargetCY); } else { // Important: All slices must have a height dividable by 'StripPadding', except the last one @@ -87,6 +87,7 @@ bool CProcessingThreadPool::Process(CProcessingRequest* pRequest) { while ((nSliceCY = ~(pRequest->StripPadding - 1) & (nTargetCY / nNumThreadsUsed)) < pRequest->StripPadding) { nNumThreadsUsed--; } + assert(nNumThreadsUsed > 1); int nLastCY = nTargetCY - (nNumThreadsUsed - 1)*nSliceCY; volatile LONG nRequestThreadCounter = nNumThreadsUsed - 1; int nCurrCY = 0;