Skip to content

Commit 82b397a

Browse files
committed
Merge remote-tracking branch 'origin/candidate-9.14.x'
Signed-off-by: Gavin Halliday <[email protected]> # Conflicts: # helm/hpcc/Chart.yaml # helm/hpcc/templates/_helpers.tpl # version.cmake
2 parents 53f0285 + 062ceb5 commit 82b397a

File tree

11 files changed

+134
-15
lines changed

11 files changed

+134
-15
lines changed

common/thorhelper/thorsoapcall.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2835,6 +2835,7 @@ class CWSCAsyncFor : implements IWSCAsyncFor, public CInterface, public CAsyncFo
28352835
break;
28362836
}
28372837
numRetries++;
2838+
master->logctx.noteStatistic(StNumSoapcallRetries, 1);
28382839
master->logctx.CTXLOG("Retrying: attempt %d of %d", numRetries, master->maxRetries);
28392840
master->activitySpanScope->recordException(e, false, false);
28402841
e->Release();

dali/base/dautils.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3750,7 +3750,7 @@ class FileReadPropertiesUpdater : public CSimpleInterfaceOf<IFileReadPropertiesU
37503750
}
37513751

37523752
public:
3753-
FileReadPropertiesUpdater(IUserDescriptor * udesc) : udesc(udesc) {}
3753+
FileReadPropertiesUpdater(IUserDescriptor * _udesc) : udesc(_udesc) {}
37543754

37553755
// Track and accumulate the readCost and numDiskReads to stats tracking map (to be written to properties later)
37563756
// - if curReadCost is 0, it will be calculated using calcFileAccessCost

ecl/hthor/hthor.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11042,10 +11042,6 @@ CHThorNewDiskReadBaseActivity::InputFileInfo * CHThorNewDiskReadBaseActivity::ex
1104211042
queryInheritSeparatorProp(*fileFormatOptions, "separator", options, "@csvSeparate");
1104311043
queryInheritProp(*fileFormatOptions, "terminator", options, "@csvTerminate");
1104411044
queryInheritProp(*fileFormatOptions, "escape", options, "@csvEscape");
11045-
11046-
//MORE: Remove before this is merged!
11047-
dbglogXML(fileFormatOptions);
11048-
dbglogXML(fileProviderOptions);
1104911045
}
1105011046

1105111047
fileProviderOptions->setPropBool("@grouped", grouped);

roxie/ccd/ccdmain.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,18 @@ int CCD_API roxie_main(int argc, const char *argv[], const char * defaultYaml)
885885
}
886886
#endif
887887

888+
//Add roxieMode configuration option, which can be used to set the defaults for various other options
889+
bool isBatchRoxie = false;
890+
const char * roxieMode = topology->queryProp("@roxieMode");
891+
if (roxieMode && strieq(roxieMode, "batch"))
892+
isBatchRoxie = true;
893+
894+
//Configure defaults based on whether the system is being used for batch or interactive queries
895+
if (isBatchRoxie)
896+
{
897+
acknowledgeAllRequests = false;
898+
}
899+
888900
if (!topology->hasProp("@resolveLocally"))
889901
topology->setPropBool("@resolveLocally", !topology->hasProp("@daliServers"));
890902

@@ -1361,6 +1373,15 @@ int CCD_API roxie_main(int argc, const char *argv[], const char * defaultYaml)
13611373
setNodeFetchThresholdNs(topology->getPropInt64("@nodeFetchThresholdNs"));
13621374
setIndexWarningThresholds(topology);
13631375

1376+
//Enabling the localNVMeCache will also by default enable remote file related optimizations
1377+
offset_t localNVMeCacheSize = topology->getPropInt64("@localNVMeCacheSize", 0);
1378+
bool usingRemoteStorage = (localNVMeCacheSize != 0);
1379+
1380+
unsigned inplaceSizeFactor = topology->getPropInt("@inplaceSizeFactor", usingRemoteStorage ? 0 : 100);
1381+
unsigned lz4SpeedFactor = topology->getPropInt("@lz4SpeedFactor", usingRemoteStorage ? 0 : 600); // If lz4 is 5x faster then value should be 500
1382+
unsigned zStdSpeedFactor = topology->getPropInt("@zStdSpeedFactor", usingRemoteStorage ? 0 : 350);
1383+
setIndexScaling(inplaceSizeFactor, lz4SpeedFactor, zStdSpeedFactor);
1384+
13641385
unsigned __int64 affinity = topology->getPropInt64("@affinity", 0);
13651386
updateAffinity(affinity);
13661387

roxie/ccd/ccdserver.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,7 @@ static const StatisticsMapping indexStatistics({StNumServerCacheHits, StNumIndex
500500
static const StatisticsMapping diskStatistics({StNumServerCacheHits, StNumDiskRowsRead, StNumDiskSeeks, StNumDiskAccepted,
501501
StNumDiskRejected, StSizeAgentReply, StTimeAgentWait, StTimeAgentQueue, StTimeAgentProcess, StTimeIBYTIDelay, StNumAckRetries, StNumAgentRequests, StSizeAgentRequests,
502502
StSizeContinuationData, StNumContinuationRequests }, actStatistics);
503-
static const StatisticsMapping soapStatistics({ StTimeSoapcall, StTimeSoapcallDNS, StTimeSoapcallConnect, StNumSoapcallConnectFailures }, actStatistics);
503+
static const StatisticsMapping soapStatistics({ StTimeSoapcall, StTimeSoapcallDNS, StTimeSoapcallConnect, StNumSoapcallConnectFailures, StNumSoapcallRetries }, actStatistics);
504504
static const StatisticsMapping groupStatistics({ StNumGroups, StNumGroupMax }, actStatistics);
505505
static const StatisticsMapping sortStatistics({ StTimeSortElapsed }, actStatistics);
506506
static const StatisticsMapping indexWriteStatistics({ StNumDuplicateKeys, StNumLeafCacheAdds, StNumNodeCacheAdds, StNumBlobCacheAdds }, actStatistics);
@@ -523,7 +523,7 @@ extern const StatisticsMapping accumulatedStatistics({StWhenFirstRow, StTimeLoca
523523
StNumBloomAccepts, StNumBloomRejects, StNumBloomSkips,
524524
StNumNodeDiskFetches, StNumLeafDiskFetches, StNumBlobDiskFetches,
525525
StNumDiskRejected, StSizeAgentReply, StTimeAgentWait,
526-
StTimeSoapcall, StTimeSoapcallDNS, StTimeSoapcallConnect, StNumSoapcallConnectFailures,
526+
StTimeSoapcall, StTimeSoapcallDNS, StTimeSoapcallConnect, StNumSoapcallConnectFailures, StNumSoapcallRetries,
527527
StNumGroups,
528528
StTimeSortElapsed,
529529
StNumDuplicateKeys,

system/jhtree/jhinplace.cpp

Lines changed: 100 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,36 @@
3939
//#define TRACE_BUILDING_STATS
4040
#endif
4141

42+
// A setting that can be scaled from 0 to 100 to track how the extra in memory saving from inplace indexes affects the performance
43+
// of roxie. Memory allocated = newSize + (oldSize - newSize) * inplaceSizeFactor/100;
44+
static unsigned inplaceSizeFactor = 0;
45+
46+
// Some settings that can be used to scale the time taken to expand nodes using the new inplace indexes.
47+
//
48+
// EstimatedTime = (TimeTakenToExpand / newPayloadSize) * oldSize * XXXSpeedFactor /100
49+
//
50+
// i.e. Estimate the time it would have taken to expand the original payload, assuming lzw compression
51+
// takes speedFactor/100 times longer than the new compression.
52+
//
53+
// This should also be used in the upcoming hybrid compression (e.g. hybrid:zstd) which uses the key compression for branches,
54+
// but the legacy compression format for leaves.
55+
//
56+
// A factor < 100 may still add time if there is a noticeable reduction in the payload data size.
57+
58+
static unsigned lz4SpeedFactor = 0;
59+
static unsigned zStdSpeedFactor = 0;
60+
static bool adjustExpansionTime = false;
61+
62+
void setIndexScaling(unsigned _inplaceSizeFactor, unsigned _lz4SpeedFactor, unsigned _zStdSpeedFactor)
63+
{
64+
inplaceSizeFactor = _inplaceSizeFactor;
65+
lz4SpeedFactor = _lz4SpeedFactor;
66+
zStdSpeedFactor = _zStdSpeedFactor;
67+
adjustExpansionTime = (lz4SpeedFactor != 0) || (zStdSpeedFactor != 0);
68+
}
69+
70+
//---------------------------------------------------------------------------------------------------------------------
71+
4272
static constexpr size32_t minRepeatCount = 2; // minimum number of times a 0x00 or 0x20 is repeated to generate a special opcode
4373
static constexpr size32_t minRepeatXCount = 3; // minimum number of times a byte is repeated to generate a special opcode
4474
static constexpr size32_t maxQuotedCount = 31 + 256; // maximum number of characters that can be quoted in a row
@@ -1665,6 +1695,8 @@ int CJHInplaceTreeNode::locateGT(const char * search, unsigned minIndex) const
16651695
}
16661696

16671697

1698+
constexpr static bool traceInplaceLoadStats = false;
1699+
16681700
void CJHInplaceTreeNode::load(CKeyHdr *_keyHdr, const void *rawData, offset_t _fpos, bool needCopy)
16691701
{
16701702
CJHSearchNode::load(_keyHdr, rawData, _fpos, needCopy);
@@ -1675,20 +1707,29 @@ void CJHInplaceTreeNode::load(CKeyHdr *_keyHdr, const void *rawData, offset_t _f
16751707

16761708
const byte * nullRow = nullptr; //MORE: This should be implemented
16771709
unsigned numKeys = hdr.numKeys;
1710+
unsigned originalKeyedSize = keyCompareLen * numKeys;
16781711
if (numKeys)
16791712
{
1713+
// only time the follow code if we are going to try and match the old timing.
1714+
CCycleTimer expansionTimer(traceInplaceLoadStats || (adjustExpansionTime && isLeaf()));
1715+
16801716
size32_t len = hdr.keyBytes;
16811717
size32_t copyLen = len;
16821718
const byte * originalData = ((const byte *)rawData) + sizeof(hdr);
16831719
const byte * originalPayload = nullptr;
16841720
bool keepCompressedPayload = true;
1721+
size32_t actualKeyedSize = len;
1722+
1723+
//For branches payloadCompression will always be COMPRESS_METHOD_NONE
1724+
CompressionMethod payloadCompression = COMPRESS_METHOD_NONE;
16851725
if (isLeaf())
16861726
{
16871727
//Always calculate payload location so we can perform a consistency check later on.
16881728
originalPayload = queryPayload(originalData);
16891729
if (originalPayload)
16901730
{
1691-
CompressionMethod payloadCompression = (CompressionMethod)*originalPayload;
1731+
actualKeyedSize = originalPayload - originalData;
1732+
payloadCompression = (CompressionMethod)*originalPayload;
16921733
switch (payloadCompression)
16931734
{
16941735
case COMPRESS_METHOD_NONE:
@@ -1699,19 +1740,27 @@ void CJHInplaceTreeNode::load(CKeyHdr *_keyHdr, const void *rawData, offset_t _f
16991740
expandPayloadOnDemand = dynamicPayloadExpansion;
17001741
keepCompressedPayload = expandPayloadOnDemand;
17011742
if (!expandPayloadOnDemand)
1702-
copyLen = (originalPayload - originalData);
1743+
copyLen = actualKeyedSize;
17031744
break;
17041745
}
17051746
}
17061747
else
1748+
{
1749+
//if no payload then actualKeySize is already set....
17071750
expandPayloadOnDemand = false;
1751+
}
17081752
}
17091753

1710-
const size32_t padding = 8 - 1; // Ensure that unsigned8 values can be read "legally"
1754+
size32_t padding = 8 - 1; // Ensure that unsigned8 values can be read "legally"
1755+
1756+
//Allow the memory used by the inplace indexes to be adjusted to explore the trend of the benefits
1757+
if (inplaceSizeFactor && (actualKeyedSize < originalKeyedSize))
1758+
padding += (originalKeyedSize - actualKeyedSize) * inplaceSizeFactor / 100;
1759+
17111760
keyBuf = (char *) allocMem(copyLen + padding);
17121761
memcpy(keyBuf, originalData, copyLen);
17131762
memset(keyBuf+copyLen, 0, padding);
1714-
expandedSize = copyLen;
1763+
expandedSize = copyLen+padding;
17151764

17161765
/**** If any of the following code changes queryPayload() must also be changed. ******/
17171766

@@ -1814,6 +1863,53 @@ void CJHInplaceTreeNode::load(CKeyHdr *_keyHdr, const void *rawData, offset_t _f
18141863
assertex((data - (const byte *)keyBuf) == len);
18151864
else
18161865
assertex((data - originalData) == len);
1866+
1867+
//Branch nodes do not use LZW compression - so this will not attempt to adjust them.
1868+
if ((adjustExpansionTime || traceInplaceLoadStats) && (payloadCompression != COMPRESS_METHOD_NONE) && !expandPayloadOnDemand)
1869+
{
1870+
__uint64 timeTakenNs = expansionTimer.elapsedNs();
1871+
unsigned scaling = 0;
1872+
switch (payloadCompression)
1873+
{
1874+
case COMPRESS_METHOD_LZW:
1875+
case COMPRESS_METHOD_LZW_LITTLE_ENDIAN:
1876+
scaling = 100; // only adjustment will be due to not expanding the keyed portion.
1877+
break;
1878+
case COMPRESS_METHOD_LZ4:
1879+
case COMPRESS_METHOD_LZ4HC:
1880+
case COMPRESS_METHOD_LZ4S:
1881+
case COMPRESS_METHOD_LZ4SHC:
1882+
case COMPRESS_METHOD_LZ4HC3:
1883+
scaling = lz4SpeedFactor;
1884+
break;
1885+
case COMPRESS_METHOD_ZSTDS:
1886+
scaling = zStdSpeedFactor;
1887+
break;
1888+
}
1889+
1890+
if (scaling)
1891+
{
1892+
size32_t actualSizeExpanded = expandedSize - padding - actualKeyedSize; // How much data was expanded in the payload
1893+
size32_t originalSizeExpanded = actualSizeExpanded + originalKeyedSize; // How much data would there have been if the whole node was compressed?
1894+
1895+
if (actualSizeExpanded)
1896+
{
1897+
//This scaling will not work sensibly if keys are not expanded on demand.
1898+
// expected = (TimeTakenToExpand / newPayloadSize) * oldSize * XXXSpeedFactor /100;
1899+
// rearrange so the divisions happen last
1900+
__uint64 expectedTime = (timeTakenNs * originalSizeExpanded * scaling) / actualSizeExpanded / 100;
1901+
1902+
//Sanity check to avoid pathological times caused by context switches etc.
1903+
if (expectedTime > 500'000)
1904+
expectedTime = 500'000;
1905+
1906+
if (traceInplaceLoadStats)
1907+
DBGLOG("InplaceLoad: %s originalSize(%u) actualSize(%u), expectedTime(%llu), actualTime(%llu)", translateFromCompMethod(payloadCompression), originalSizeExpanded, actualSizeExpanded, expectedTime, timeTakenNs);
1908+
else if (expectedTime > timeTakenNs)
1909+
NanoSleep(expectedTime- timeTakenNs);
1910+
}
1911+
}
1912+
}
18171913
}
18181914
}
18191915

system/jhtree/jhtree.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,8 @@ extern jhtree_decl bool isIndexFile(IFile *filename);
299299

300300
extern jhtree_decl IIndexLookup *createIndexLookup(IKeyManager *keyManager);
301301

302+
extern jhtree_decl void setIndexScaling(unsigned _inplaceSizeFactor, unsigned _lz4SpeedFactor, unsigned _zStdSpeedFactor);
303+
302304
#define JHTREE_KEY_NOT_SORTED JHTREE_ERROR_START
303305
#define JHTREE_KEY_UNKNOWN_COMPRESSION (JHTREE_ERROR_START+1)
304306

system/jlib/jstatcodes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ enum StatisticKind
360360
StTimeQueryConsume,
361361
StCycleQueryConsumeCycles,
362362
StNumSuccesses,
363+
StNumSoapcallRetries,
363364
StMax,
364365

365366
//For any quantity there is potentially the following variants.

system/jlib/jstats.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,10 +1011,10 @@ static const constexpr StatisticMeta statsMetaData[StMax] = {
10111011
{ NUMSTAT(BloomRejects), "The number of times a bloom filter rejects an index lookup" },
10121012
{ NUMSTAT(BloomSkips), "The number of times a bloom filter cannot filter an index lookup" },
10131013
{ NUMSTAT(Accepts), "The number of items accepted for processing" },
1014-
{ NUMSTAT(Waits), "The number of times a component waits for a entry on a queue" },
1014+
{ NUMSTAT(Waits), "The number of times a component waits for an entry on a queue" },
10151015
{ TIMESTAT(Provision), "The total time spent provisioning a component" },
10161016
{ CYCLESTAT(Provision) },
1017-
{ COSTSTAT(Start), "The cost assocaiated with starting a component or operation" },
1017+
{ COSTSTAT(Start), "The cost associated with starting a component or operation" },
10181018
{ TIMESTAT(WaitSuccess), "The time waiting for an item on a queue, when an item was eventually received" },
10191019
{ CYCLESTAT(WaitSuccess) },
10201020
{ TIMESTAT(WaitFailure), "The time waiting for an item on a queue, when no item was received" },
@@ -1027,6 +1027,7 @@ static const constexpr StatisticMeta statsMetaData[StMax] = {
10271027
{ TIMESTAT(QueryConsume), "The total time spent consuming and processing a query input" },
10281028
{ CYCLESTAT(QueryConsume) },
10291029
{ NUMSTAT(Successes), "The number of times something was successful" },
1030+
{ NUMSTAT(SoapcallRetries), "The number of times a soapcall request retries" },
10301031
};
10311032

10321033
static MapStringTo<StatisticKind, StatisticKind> statisticNameMap(true);

thorlcr/thorutil/thormisc.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ static Owned<IMPtagAllocator> ClusterMPAllocator;
8080
// stat. mappings shared between master and slave activities
8181
const StatisticsMapping spillStatistics({StTimeSpillElapsed, StTimeSortElapsed, StNumSpills, StSizeSpillFile, StSizePeakTempDisk});
8282
const StatisticsMapping executeStatistics({StWhenFirstRow, StTimeElapsed, StTimeTotalExecute, StTimeLocalExecute, StTimeBlocked});
83-
const StatisticsMapping soapcallStatistics({StTimeSoapcall, StTimeSoapcallDNS, StTimeSoapcallConnect, StNumSoapcallConnectFailures});
83+
const StatisticsMapping soapcallStatistics({StTimeSoapcall, StTimeSoapcallDNS, StTimeSoapcallConnect, StNumSoapcallConnectFailures, StNumSoapcallRetries});
8484
const StatisticsMapping basicActivityStatistics({StNumParallelExecute, StTimeLookAhead}, executeStatistics, spillStatistics);
8585
const StatisticsMapping groupActivityStatistics({StNumGroups, StNumGroupMax}, basicActivityStatistics);
8686
const StatisticsMapping indexReadFileStatistics({}, diskReadRemoteStatistics, jhtreeCacheStatistics);

0 commit comments

Comments
 (0)