Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYSTEMDS-???] Java17 Vectorized LibMM #2216

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bin/systemds
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ if [ $WORKER == 1 ]; then
print_out "# starting Federated worker on port $PORT"
CMD=" \
java $SYSTEMDS_STANDALONE_OPTS \
--add-modules=jdk.incubator.vector \
$LOG4JPROPFULL \
-jar $SYSTEMDS_JAR_FILE \
-w $PORT \
Expand All @@ -422,6 +423,7 @@ elif [ "$FEDMONITORING" == 1 ]; then
print_out "# starting Federated backend monitoring on port $PORT"
CMD=" \
java $SYSTEMDS_STANDALONE_OPTS \
--add-modules=jdk.incubator.vector \
$LOG4JPROPFULL \
-jar $SYSTEMDS_JAR_FILE \
-fedMonitoring $PORT \
Expand All @@ -433,6 +435,7 @@ elif [ $SYSDS_DISTRIBUTED == 0 ]; then
CMD=" \
java $SYSTEMDS_STANDALONE_OPTS \
$LOG4JPROPFULL \
--add-modules=jdk.incubator.vector \
-jar $SYSTEMDS_JAR_FILE \
-f $SCRIPT_FILE \
-exec $SYSDS_EXEC_MODE \
Expand All @@ -442,6 +445,7 @@ else
print_out "# Running script $SCRIPT_FILE distributed with opts: $*"
CMD=" \
spark-submit $SYSTEMDS_DISTRIBUTED_OPTS \
--add-modules=jdk.incubator.vector \
$SYSTEMDS_JAR_FILE \
-f $SCRIPT_FILE \
-exec $SYSDS_EXEC_MODE \
Expand Down
12 changes: 9 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
<!-- aws-java-sdk-bundle version should align with hadoop-aws version -->
<!-- aws-java-sdk-bundle.version>1.12.367</aws-java-sdk-bundle.version -->
<!-- Set java compile level via argument, ex: 1.8 1.9 10 11-->
<java.level>11</java.level>
<java.level>17</java.level>
<java.version>{java.level}</java.version>
<!-->Testing settings<!-->
<maven.test.skip>false</maven.test.skip>
Expand All @@ -77,6 +77,7 @@
<test-forkCount>1C</test-forkCount>
<rerun.failing.tests.count>2</rerun.failing.tests.count>
<jacoco.skip>false</jacoco.skip>
<doc.skip>false</doc.skip>
<jacoco.include>**</jacoco.include>
<automatedtestbase.outputbuffering>false</automatedtestbase.outputbuffering>
<argLine>-Xms3000m -Xmx3000m -Xmn300m</argLine>
Expand Down Expand Up @@ -345,6 +346,9 @@
<source>${java.level}</source>
<target>${java.level}</target>
<release>${java.level}</release>
<compilerArgs>
<arg>--add-modules=jdk.incubator.vector</arg>
</compilerArgs>
</configuration>
</plugin>

Expand All @@ -367,6 +371,7 @@
<systemPropertyVariables>
<log4j.configurationFile>file:src/test/resources/log4j.properties</log4j.configurationFile>
</systemPropertyVariables>
<argLine>--add-modules=jdk.incubator.vector</argLine>
</configuration>
</plugin>

Expand Down Expand Up @@ -875,9 +880,10 @@
<configuration>
<excludePackageNames>*.protobuf</excludePackageNames>
<notimestamp>true</notimestamp>
<failOnWarnings>true</failOnWarnings>
<failOnWarnings>false</failOnWarnings>
<quiet>true</quiet>
<skip>false</skip>
<additionalJOption>--add-modules=jdk.incubator.vector</additionalJOption>
<skip>${doc.skip}</skip>
<show>public</show>
<source>${java.level}</source>
</configuration>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
import java.util.List;
import java.util.concurrent.ExecutorService;

// import jdk.incubator.vector.DoubleVector;
// import jdk.incubator.vector.VectorSpecies;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.VectorSpecies;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
Expand Down Expand Up @@ -75,7 +75,7 @@ public class ColGroupDDC extends APreAgg implements IMapToDataGroup {

protected final AMapToData _data;

// static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;
static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;

private ColGroupDDC(IColIndex colIndexes, IDictionary dict, AMapToData data, int[] cachedCounts) {
super(colIndexes, dict, cachedCounts);
Expand Down Expand Up @@ -611,16 +611,16 @@ private void identityRightDecompressingMult(MatrixBlock right, MatrixBlock ret,
final double[] b = right.getDenseBlockValues();
final double[] c = ret.getDenseBlockValues();
final int jd = right.getNumColumns();
final int vLen = 8;
final DoubleVector vVec = DoubleVector.zero(SPECIES);
final int vLen = SPECIES.length();
final int lenJ = cru - crl;
final int end = cru - (lenJ % vLen);
for(int i = rl; i < ru; i++) {
int k = _data.getIndex(i);
final int offOut = i * jd + crl;
final double aa = 1;
final int k_right = _colIndexes.get(k);
vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen);

vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
}
}

Expand All @@ -630,8 +630,8 @@ private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, i
final double[] c = ret.getDenseBlockValues();
final int kd = _colIndexes.size();
final int jd = right.getNumColumns();
// final DoubleVector vVec = DoubleVector.zero(SPECIES);
final int vLen = 8;
final DoubleVector vVec = DoubleVector.zero(SPECIES);
final int vLen = SPECIES.length();

final int blkzI = 32;
final int blkzK = 24;
Expand All @@ -647,32 +647,22 @@ private void defaultRightDecompressingMult(MatrixBlock right, MatrixBlock ret, i
for(int k = bk; k < bke; k++) {
final double aa = a[offi + k];
final int k_right = _colIndexes.get(k);
vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen);
vectMM(aa, b, c, end, jd, crl, cru, offOut, k_right, vLen, vVec);
}
}
}
}
}

final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k,
int vLen) {
// vVec = vVec.broadcast(aa);
final void vectMM(double aa, double[] b, double[] c, int endT, int jd, int crl, int cru, int offOut, int k, int vLen, DoubleVector vVec) {
vVec = vVec.broadcast(aa);
final int offj = k * jd;
final int end = endT + offj;
for(int j = offj + crl; j < end; j += vLen, offOut += vLen) {
// DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
// DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
// res = vVec.fma(bVec, res);
// res.intoArray(c, offOut);

c[offOut] += aa * b[j];
c[offOut + 1] += aa * b[j + 1];
c[offOut + 2] += aa * b[j + 2];
c[offOut + 3] += aa * b[j + 3];
c[offOut + 4] += aa * b[j + 4];
c[offOut + 5] += aa * b[j + 5];
c[offOut + 6] += aa * b[j + 6];
c[offOut + 7] += aa * b[j + 7];
DoubleVector res = DoubleVector.fromArray(SPECIES, c, offOut);
DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
res = vVec.fma(bVec, res);
res.intoArray(c, offOut);
}
for(int j = end; j < cru + offj; j++, offOut++) {
double bb = b[j];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import java.util.Arrays;
import java.util.Set;

import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.VectorSpecies;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.sysds.runtime.compress.DMLCompressionException;
import org.apache.sysds.runtime.compress.colgroup.indexes.ArrayIndex;
Expand Down Expand Up @@ -65,6 +67,8 @@ public class MatrixBlockDictionary extends ADictionary {

final private MatrixBlock _data;

static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED;

/**
* Unsafe private constructor that does not check the data validity. USE WITH CAUTION.
*
Expand Down Expand Up @@ -2081,7 +2085,71 @@ private void preaggValuesFromDenseDictDenseAggArray(final int numVals, final ICo

private void preaggValuesFromDenseDictDenseAggRange(final int numVals, final IColIndex colIndexes, final int s,
final int e, final double[] b, final int cut, final double[] ret) {
preaggValuesFromDenseDictDenseAggRangeGeneric(numVals, colIndexes, s, e, b, cut, ret);
if(colIndexes instanceof RangeIndex) {
RangeIndex ri = (RangeIndex) colIndexes;
preaggValuesFromDenseDictDenseAggRangeRange(numVals, ri.get(0), ri.get(0) + ri.size(), s, e, b, cut, ret);
}
else
preaggValuesFromDenseDictDenseAggRangeGeneric(numVals, colIndexes, s, e, b, cut, ret);
}

private void preaggValuesFromDenseDictDenseAggRangeRange(final int numVals, final int ls, final int le, final int rs,
final int re, final double[] b, final int cut, final double[] ret) {
final int cz = le - ls;
final int az = re - rs;
// final int nCells = numVals * cz;
final double[] values = _data.getDenseBlockValues();
// Correctly named ikj matrix multiplication .

final int blkzI = 32;
final int blkzK = 24;
final int blkzJ = 1024;
for(int bi = 0; bi < numVals; bi += blkzI) {
final int bie = Math.min(numVals, bi + blkzI);
for(int bk = 0; bk < cz; bk += blkzK) {
final int bke = Math.min(cz, bk + blkzK);
for(int bj = 0; bj < az; bj += blkzJ) {
final int bje = Math.min(az, bj + blkzJ);
final int sOffT = rs + bj;
final int eOffT = rs + bje;
preaggValuesFromDenseDictBlockedIKJ(values, b, ret, bi, bk, bj, bie, bke, cz, az, ls, cut, sOffT, eOffT);
}
}
}
}

private static void preaggValuesFromDenseDictBlockedIKJ(double[] a, double[] b, double[] ret, int bi, int bk, int bj,
int bie, int bke, int cz, int az, int ls, int cut, int sOffT, int eOffT) {
final int vLen = SPECIES.length();
final DoubleVector vVec = DoubleVector.zero(SPECIES);
final int leftover = sOffT - eOffT % vLen; // leftover not vectorized
for(int i = bi; i < bie; i++) {
final int offI = i * cz;
final int offOutT = i * az + bj;
for(int k = bk; k < bke; k++) {
final int idb = (k + ls) * cut;
final int sOff = sOffT + idb;
final int eOff = eOffT + idb;
final double v = a[offI + k];
vecInnerLoop(v, b, ret, offOutT, eOff, sOff, leftover, vLen, vVec);
}
}
}

private static void vecInnerLoop(final double v, final double[] b, final double[] ret, final int offOutT,
final int eOff, final int sOff, final int leftover, final int vLen, DoubleVector vVec) {
int offOut = offOutT;
vVec = vVec.broadcast(v);
final int end = eOff - leftover;
for(int j = sOff; j < end; j += vLen, offOut += vLen) {
DoubleVector res = DoubleVector.fromArray(SPECIES, ret, offOut);
DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j);
vVec.fma(bVec, res).intoArray(ret, offOut);
}
for(int j = end; j < eOff; j++, offOut++) {
ret[offOut] += v * b[j];
}

}

private void preaggValuesFromDenseDictDenseAggRangeGeneric(final int numVals, final IColIndex colIndexes,
Expand Down
Loading
Loading