Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions H/HPTT/build_tarballs.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
using BinaryBuilder, Pkg

name = "HPTT"
version = v"1.0.5"

# Collection of sources required to complete build
sources = [
GitSource("https://github.com/springer13/hptt.git", "a55c2a927d5462e81abeb12081fd345024caf5f6"),
DirectorySource("./bundled"),
]

# Bash recipe for building across all platforms
script = raw"""
atomic_patch -p1 $WORKSPACE/srcdir/patches/clang_compatibility.patch
mkdir -p ${libdir}
mkdir -p ${includedir}
export hpttdir=${WORKSPACE}/srcdir/hptt
cp ${hpttdir}/include/* ${includedir}
export CXXFLAGS="-O3 -std=c++11 -DNDEBUG -fopenmp -fPIC"
if [[ ${proc_family} == intel ]]; then
export CXXFLAGS="$CXXFLAGS -mavx -DHPTT_ARCH_AVX"
elif [[ ${proc_family} == power ]]; then
export CXXFLAGS="$CXXFLAGS -DHPTT_ARCH_IBM -maltivec -mabi=altivec";
## specific arm optimizations seem to be broken in library
# elif [[ ${target} == arm* ]]; then
# export CXXFLAGS="$CXXFLAGS -mfpu=neon -DHPTT_ARCH_ARM"
fi
for f in ${hpttdir}/src/*.cpp; do
$CXX $CXXFLAGS -I ${includedir} -c $f -o ${f%.cpp}.o
done
$CXX ${hpttdir}/src/*.o $CXXFLAGS -o ${libdir}/libhptt.$dlext -shared
install_license ${hpttdir}/LICENSE.txt
"""

# These are the platforms we will build for by default, unless further
# platforms are passed in on the command line
platforms = supported_platforms()
platforms = expand_cxxstring_abis(platforms)

# The products that we will ensure are always built
products = [
LibraryProduct("libhptt", :libhptt),
]

# Dependencies that must be installed before this package can be built
dependencies = [
Dependency(PackageSpec(name="CompilerSupportLibraries_jll", uuid="e66e0078-7015-5450-92f7-15fbd957f2ae"); platforms=filter(!Sys.isbsd, platforms))
Dependency(PackageSpec(name="LLVMOpenMP_jll", uuid="1d63c593-3942-5779-bab2-d838dc0a180e"); platforms=filter(Sys.isbsd, platforms))
]

# Build the tarballs, and possibly a `build.jl` as well.
build_tarballs(ARGS, name, version, sources, script, platforms, products, dependencies; preferred_gcc_version = v"5.2.0", julia_compat="1.6")
40 changes: 40 additions & 0 deletions H/HPTT/bundled/patches/clang_compatibility.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
diff --git before/hptt/include/hptt_types.h after/hptt/include/hptt_types.h
index 170288e..ebc5796 100644
--- before/hptt/include/hptt_types.h
+++ after/hptt/include/hptt_types.h
@@ -1,7 +1,6 @@
#pragma once

#include <complex>
-#include <complex.h>

#define REGISTER_BITS 256 // AVX
#ifdef HPTT_ARCH_ARM
diff --git before/hptt/src/hptt.cpp after/hptt/src/hptt.cpp
index 82d4e73..3018664 100644
--- before/hptt/src/hptt.cpp
+++ after/hptt/src/hptt.cpp
@@ -180,8 +180,10 @@ void cTensorTranspose( const int *perm, const int dim,
const float _Complex beta, float _Complex *B, const int *outerSizeB,
const int numThreads, const int useRowMajor)
{
+ const hptt::FloatComplex* calpha = reinterpret_cast<const hptt::FloatComplex*>(&alpha);
+ const hptt::FloatComplex* cbeta = reinterpret_cast<const hptt::FloatComplex*>(&beta);
auto plan(std::make_shared<hptt::Transpose<hptt::FloatComplex> >(sizeA, perm, outerSizeA, outerSizeB, dim,
- (const hptt::FloatComplex*) A, (hptt::FloatComplex) alpha, (hptt::FloatComplex*) B, (hptt::FloatComplex) beta, hptt::ESTIMATE, numThreads, nullptr, useRowMajor));
+ (const hptt::FloatComplex*) A, *calpha, (hptt::FloatComplex*) B, *cbeta, hptt::ESTIMATE, numThreads, nullptr, useRowMajor));
plan->setConjA(conjA);
plan->execute();
}
@@ -191,8 +193,10 @@ void zTensorTranspose( const int *perm, const int dim,
const double _Complex beta, double _Complex *B, const int *outerSizeB,
const int numThreads, const int useRowMajor)
{
+ const hptt::DoubleComplex* calpha = reinterpret_cast<const hptt::DoubleComplex*>(&alpha);
+ const hptt::DoubleComplex* cbeta = reinterpret_cast<const hptt::DoubleComplex*>(&beta);
auto plan(std::make_shared<hptt::Transpose<hptt::DoubleComplex> >(sizeA, perm, outerSizeA, outerSizeB, dim,
- (const hptt::DoubleComplex*) A, (hptt::DoubleComplex) alpha, (hptt::DoubleComplex*) B, (hptt::DoubleComplex) beta, hptt::ESTIMATE, numThreads, nullptr, useRowMajor));
+ (const hptt::DoubleComplex*) A, *calpha, (hptt::DoubleComplex*) B, *cbeta, hptt::ESTIMATE, numThreads, nullptr, useRowMajor));
plan->setConjA(conjA);
plan->execute();
}