diff --git a/demo/customslots.pxd b/demo/customslots.pxd index 44140d0..2829dba 100644 --- a/demo/customslots.pxd +++ b/demo/customslots.pxd @@ -5,9 +5,9 @@ cdef extern from "customslots.h": void *pointer Py_ssize_t objoffset uintptr_t flags - + ctypedef struct PyCustomSlot: - uintrptr_t id + uintptr_t id pyx_data data int PyCustomSlots_Check(obj) diff --git a/extensibletype/extensibletype.pxd b/extensibletype/extensibletype.pxd new file mode 100644 index 0000000..6599e76 --- /dev/null +++ b/extensibletype/extensibletype.pxd @@ -0,0 +1,27 @@ +cdef extern from "pstdint.h": + ctypedef unsigned int uint32_t + ctypedef unsigned long long uint64_t + ctypedef unsigned short uint16_t + ctypedef unsigned char uint8_t + ctypedef uint64_t uintptr_t + +cdef extern from "perfecthash.h": + ctypedef struct PyCustomSlots_Entry: + uint64_t id + void *ptr + + ctypedef struct PyCustomSlots_Table: + uint64_t flags + uint64_t m_f, m_g + PyCustomSlots_Entry *entries + uint16_t n, b + uint8_t r + + ctypedef struct PyCustomSlots_Table_64_64: + PyCustomSlots_Table base + uint16_t d[64] + PyCustomSlots_Entry entries_mem[64] + + + int PyCustomSlots_PerfectHash(PyCustomSlots_Table *table, uint64_t *hashes) + diff --git a/extensibletype/extensibletype.pyx b/extensibletype/extensibletype.pyx index dd4eed2..268fdc9 100644 --- a/extensibletype/extensibletype.pyx +++ b/extensibletype/extensibletype.pyx @@ -1,42 +1,30 @@ cimport numpy as cnp import numpy as np -cdef extern from "stdint.h": - ctypedef unsigned int uint32_t - ctypedef unsigned long long uint64_t - ctypedef unsigned short uint16_t - ctypedef unsigned char uint8_t - ctypedef uint64_t uintptr_t +import hashlib cdef extern from "perfecthash.h": - ctypedef struct PyCustomSlots_Entry: - char *id - uintptr_t flags - void *ptr - - ctypedef struct PyCustomSlots_Table: - uint64_t flags - uint64_t m_f, m_g - PyCustomSlots_Entry *entries - uint16_t n, b - uint8_t r - - ctypedef struct PyCustomSlots_Table_64_64: - PyCustomSlots_Table base - uint16_t d[64] - PyCustomSlots_Entry entries_mem[64] - - - int PyCustomSlots_PerfectHash(PyCustomSlots_Table *table, uint64_t *hashes) void _PyCustomSlots_bucket_argsort(uint16_t *p, uint8_t *binsizes, - uint8_t *number_of_bins_by_size) + uint16_t *number_of_bins_by_size) def bucket_argsort(cnp.ndarray[uint16_t, mode='c'] p, cnp.ndarray[uint8_t, mode='c'] binsizes, - cnp.ndarray[uint8_t, mode='c'] number_of_bins_by_size): + cnp.ndarray[uint16_t, mode='c'] number_of_bins_by_size): _PyCustomSlots_bucket_argsort(&p[0], &binsizes[0], &number_of_bins_by_size[0]) +def get_random_hashes(rng, nitems): + return rng.randint(-2**31, 2**31-1, size=nitems).astype(np.uint64) + +def draw_hashes(rng, nitems): + assert sizeof(long) >= 4 + + hashes = get_random_hashes(rng, nitems) + hashes <<= 32 + hashes |= get_random_hashes(rng, nitems) + + return hashes + def perfect_hash(cnp.ndarray[uint64_t] hashes, int repeat=1): """Used for testing. Takes the hashes as input, and returns a permutation array and hash parameters: @@ -49,8 +37,7 @@ def perfect_hash(cnp.ndarray[uint64_t] hashes, int repeat=1): table.base.b = 64 table.base.entries = &table.entries_mem[0] for i in range(64): - table.entries_mem[i].id = NULL - table.entries_mem[i].flags = i + table.entries_mem[i].id = hashes[i] table.entries_mem[i].ptr = NULL cdef int r @@ -61,38 +48,38 @@ def perfect_hash(cnp.ndarray[uint64_t] hashes, int repeat=1): p = np.zeros(64, dtype=np.uint16) for i in range(64): - p[i] = table.entries_mem[i].flags + p[i] = table.entries_mem[i].id & 0xFF d[i] = table.d[i] return p, table.base.r, table.base.m_f, table.base.m_g, d -cdef extern from "md5sum.h": - ctypedef struct MD5_CTX: - uint32_t i[2] - uint32_t buf[4] - unsigned char in_ "in"[64] - unsigned char digest[16] - - void MD5Init(MD5_CTX *mdContext) - void MD5Update(MD5_CTX *mdContext, unsigned char *inBuf, - unsigned int inLen) - void MD5Final(MD5_CTX *mdContext) - -cdef extern from "hash.h": - uint64_t hash_crapwow64(unsigned char *buf, uint64_t len, uint64_t seed) - -def crapwowbench(int repeat=1): - cdef int r - cdef MD5_CTX ctx - for r in range(repeat): - hash_crapwow64("asdf", 4, 0xf123456781234567) - - -def md5bench(int repeat=1): - cdef int r - cdef MD5_CTX ctx - for r in range(repeat): - MD5Init(&ctx) - MD5Update(&ctx, "asdf", 4) - MD5Final(&ctx) +#cdef extern from "md5sum.h": +# ctypedef struct MD5_CTX: +# uint32_t i[2] +# uint32_t buf[4] +# unsigned char in_ "in"[64] +# unsigned char digest[16] +# +# void MD5Init(MD5_CTX *mdContext) +# void MD5Update(MD5_CTX *mdContext, unsigned char *inBuf, +# unsigned int inLen) +# void MD5Final(MD5_CTX *mdContext) +# +#cdef extern from "hash.h": +# uint64_t hash_crapwow64(unsigned char *buf, uint64_t len, uint64_t seed) +# +#def crapwowbench(int repeat=1): +# cdef int r +# cdef MD5_CTX ctx +# for r in range(repeat): +# hash_crapwow64("asdf", 4, 0xf123456781234567) +# +# +#def md5bench(int repeat=1): +# cdef int r +# cdef MD5_CTX ctx +# for r in range(repeat): +# MD5Init(&ctx) +# MD5Update(&ctx, "asdf", 4) +# MD5Final(&ctx) diff --git a/extensibletype/intern.pxd b/extensibletype/intern.pxd new file mode 100644 index 0000000..2128b68 --- /dev/null +++ b/extensibletype/intern.pxd @@ -0,0 +1,17 @@ +from extensibletype cimport * + +cdef extern from "Python.h": + ctypedef unsigned int Py_uintptr_t + +cdef extern from *: + ctypedef char *string_t "const char *" + +cdef extern from "globalinterning.h": + ctypedef void *intern_table_t + + intern_table_t *intern_create_table(intern_table_t *table) except NULL + void intern_destroy_table(intern_table_t *table) + uint64_t intern_key(intern_table_t *table, string_t key) except? 0 + + int PyIntern_Initialize() except -1 + uint64_t PyIntern_AddKey(string_t key) except? 0 diff --git a/extensibletype/intern.pyx b/extensibletype/intern.pyx new file mode 100644 index 0000000..76aeeff --- /dev/null +++ b/extensibletype/intern.pyx @@ -0,0 +1,20 @@ +def global_intern(bytes key): + return PyIntern_AddKey(key) + +def global_intern_initialize(): + PyIntern_Initialize() + +cdef class InternTable(object): + "Wrap intern tables (intern_table_t)" + + cdef intern_table_t _table + cdef intern_table_t *table + + def __init__(self): + self.table = intern_create_table(&self._table) + + def __dealloc__(self): + intern_destroy_table(self.table) + + def intern(self, bytes key): + return intern_key(self.table, key) diff --git a/extensibletype/methodtable.pyx b/extensibletype/methodtable.pyx new file mode 100644 index 0000000..04b367f --- /dev/null +++ b/extensibletype/methodtable.pyx @@ -0,0 +1,183 @@ +from libc cimport stdlib +cimport numpy as cnp +import numpy as np + +from extensibletype cimport * +from . import extensibletype + +import intern + +def roundup(x): + "Round up to a power of two" + x -= 1 + x |= x >> 1 + x |= x >> 2 + x |= x >> 4 + x |= x >> 8 + x |= x >> 16 + x |= x >> 32 + x += 1 + return x + +class HashingError(Exception): + """ + Raised when we can't create a perfect hash-based function table. + """ + +cdef PyCustomSlots_Table *allocate_hash_table(uint16_t size) except NULL: + cdef PyCustomSlots_Table *table + cdef uint16_t nbins + + size = roundup(size) + assert size * 4 <= 0xFFFF, hex(size) + nbins = size * 4 + + table = stdlib.calloc( + 1, sizeof(PyCustomSlots_Table) + sizeof(uint16_t) * nbins + + sizeof(PyCustomSlots_Entry) * size) + + if table == NULL: + raise MemoryError + + table.n = size + table.b = nbins + table.flags = 0 + + assert table.b >= table.n, (table.b, table.n, nbins) + + table.entries = ( + ( table) + sizeof(PyCustomSlots_Table) + + nbins * sizeof(uint16_t)) + + return table + +def make_bytes(s): + if isinstance(s, str): + # Python 3 + s = s.encode("ascii") + + return s + +cdef class Hasher(object): + """ + Generate a globally unique hashes for signature strings. + """ + + def hash_signature(self, signature): + cdef uint64_t hashvalue + # cdef bytes md5 = hashlib.md5(signature).digest() + # (&hashvalue)[0] = ( md5)[0] + + hashvalue = intern.global_intern(make_bytes(signature)) + return hashvalue + + +cdef class PerfectHashMethodTable(object): + """ + Simple wrapper for hash-based virtual method tables. + """ + + cdef PyCustomSlots_Table *table + cdef uint16_t *displacements + cdef Hasher hasher + + cdef object id_to_signature, signatures + + def __init__(self, hasher): + self.hasher = hasher + # For debugging + self.id_to_signature = {} + + def generate_table(self, n, ids, flags, funcs, method_names=None): + cdef Py_ssize_t i + cdef cnp.ndarray[uint64_t] hashes + + self.table = allocate_hash_table(n) + self.displacements = ( self.table + + sizeof(PyCustomSlots_Table)) + + hashes = np.zeros(self.table.n, dtype=np.uint64) + + intern.global_intern_initialize() + + # Initialize hash table entries, build hash ids + assert len(ids) == len(flags) == len(funcs) + + for i, (signature, flag, func) in enumerate(zip(ids, flags, funcs)): + id = self.hasher.hash_signature(signature) + + self.table.entries[i].id = id + self.table.entries[i].ptr = func + + hashes[i] = id + self.id_to_signature[id] = signature + + + hashes[n:self.table.n] = extensibletype.draw_hashes(np.random, + self.table.n - n) + # print "n", n, "table.n", self.table.n, "table.b", self.table.b + assert len(np.unique(hashes)) == len(hashes) + + # print "-----------------------" + # print self + # print "-----------------------" + + assert self.table.b >= self.table.n, (self.table.b, self.table.n) + + # Perfect hash our table + if PyCustomSlots_PerfectHash(self.table, &hashes[0]) < 0: + # TODO: sensible error messages + raise HashingError( + "Unable to create perfect hash table for table: %s" % self) + + for i, signature in enumerate(ids): + assert self.find_method(signature) is not None, (i, signature) + + # For debugging + self.signatures = ids + + def find_method(self, signature): + """ + Find method of the given signature. Use from non-performance + critical code. + """ + cdef uint64_t prehash = intern.global_intern(make_bytes(signature)) + + assert 0 <= self.displacements[prehash & self.table.m_g] < self.table.b + cdef uint64_t idx = (((prehash >> self.table.r) & self.table.m_f) ^ + self.displacements[prehash & self.table.m_g]) + + assert 0 <= idx < self.size, (idx, self.size) + + if self.table.entries[idx].id != prehash: + return None + + return ( self.table.entries[idx].ptr, + self.table.entries[idx].id & 0xFF) + + def __str__(self): + buf = ["PerfectHashMethodTable("] + for i in range(self.table.n): + id = self.table.entries[i].id + ptr = self.table.entries[i].ptr + sig = self.id_to_signature.get(id, "") + s = " id: 0x%-16x funcptr: %20d signature: %s" % (id, ptr, sig) + buf.append(s) + + buf.append(")") + + return "\n".join(buf) + + def __dealloc__(self): + # stdlib.free(self.table) + # self.table = NULL + pass + + property table_ptr: + def __get__(self): + return self.table + + property size: + def __get__(self): + return self.table.n + diff --git a/extensibletype/test/pstdint.pyx b/extensibletype/test/pstdint.pyx new file mode 100644 index 0000000..ec59c80 --- /dev/null +++ b/extensibletype/test/pstdint.pyx @@ -0,0 +1,21 @@ +cdef extern from "pstdint.h": + ctypedef int int8_t + ctypedef int int16_t + ctypedef int int32_t + ctypedef int int64_t + + ctypedef int uint8_t + ctypedef int uint16_t + ctypedef int uint32_t + ctypedef int uint64_t + + ctypedef int intptr_t + ctypedef int uintptr_t + +def test_pstdint(): + assert sizeof(int8_t) == sizeof(uint8_t) == 1 + assert sizeof(int16_t) == sizeof(uint16_t) == 2 + assert sizeof(int32_t) == sizeof(uint32_t) == 4 + assert sizeof(int64_t) == sizeof(uint64_t) == 8 + + assert sizeof(intptr_t) == sizeof(uintptr_t) >= sizeof(void *) diff --git a/extensibletype/test/test_interning.py b/extensibletype/test/test_interning.py new file mode 100644 index 0000000..8968193 --- /dev/null +++ b/extensibletype/test/test_interning.py @@ -0,0 +1,43 @@ +from .. import intern + +def test_global_interning(): + # Can't really test for this with nose... + # try: + # intern.global_intern("hello") + # except AssertionError as e: + # pass + # else: + # raise Exception("Expects complaint about uninitialized table") + + intern.global_intern_initialize() + id1 = intern.global_intern("hello") + id2 = intern.global_intern("hello") + id3 = intern.global_intern("hallo") + assert id1 == id2 + assert id1 != id3 + +def test_interning(): + table = intern.InternTable() + + id1 = intern.global_intern("hello") + id2 = intern.global_intern("hello") + id3 = intern.global_intern("hallo") + assert id1 == id2 + assert id1 != id3 + +def test_intern_many(): + table = intern.InternTable() + + itoid = {} + for i in range(1000000): + id = table.intern("my randrom string %d" % i) + itoid[i] = id + + id1 = table.intern("my randrom string %d" % (i // 2)) + id2 = table.intern("my randrom string %d" % (i // 4)) + + assert id1 == itoid[i//2] + assert id2 == itoid[i//4] + +if __name__ == '__main__': + test_intern_many() \ No newline at end of file diff --git a/extensibletype/test/test_perfecthashing.py b/extensibletype/test/test_perfecthashing.py index c8a11aa..2dc4499 100644 --- a/extensibletype/test/test_perfecthashing.py +++ b/extensibletype/test/test_perfecthashing.py @@ -1,42 +1,74 @@ +import time +import itertools + from nose.tools import eq_, ok_ import numpy as np -from .. import extensibletype - -def draw_hashes(rng, nitems): - hashes = rng.randint(2**32, size=nitems).astype(np.uint64) - hashes <<= 32 - hashes |= rng.randint(2**32, size=nitems).astype(np.uint64) - return hashes - -def roundup(x): - x -= 1 - x |= x >> 1 - x |= x >> 2 - x |= x >> 4 - x |= x >> 8 - x |= x >> 16 - x |= x >> 32 - x += 1 - return x +from .. import extensibletype, methodtable + def test_binsort(): nbins = 64 + p = np.zeros(nbins, dtype=np.uint16) binsizes = np.random.randint(0, 7, size=nbins).astype(np.uint8) - num_by_size = np.zeros(8, dtype=np.uint8) - x = np.bincount(binsizes).astype(np.uint8) + + num_by_size = np.zeros(8, dtype=np.uint16) + x = np.bincount(binsizes).astype(np.uint16) + num_by_size[:x.shape[0]] = x extensibletype.bucket_argsort(p, binsizes, num_by_size) assert np.all(sorted(binsizes) == binsizes[p][::-1]) def test_basic(): n=64 - prehashes = draw_hashes(np.random, n) - p, r, m_f, m_g, d = extensibletype.perfect_hash(prehashes, repeat=10**6) + prehashes = extensibletype.draw_hashes(np.random, n) + assert len(prehashes) == len(set(prehashes)) + p, r, m_f, m_g, d = extensibletype.perfect_hash(prehashes, repeat=10**5) hashes = ((prehashes >> r) & m_f) ^ d[prehashes & m_g] - print p - print d + print(p) + print(d) hashes.sort() - print hashes + print(hashes) assert len(hashes) == len(np.unique(hashes)) - + +# --- +# Test methodtable + +def make_signature(type_permutation): + return "".join(type_permutation[:-1]) + '->' + type_permutation[-1] + +def make_ids(): + types = ['f', 'd', 'i', 'l', 'O'] + power = 5 + return map(make_signature, itertools.product(*(types,) * power)) + +def build_and_verify_methodtable(ids, flags, funcs): + table = methodtable.PerfectHashMethodTable(methodtable.Hasher()) + table.generate_table(len(ids), ids, flags, funcs) + + for (signature, flag, func) in zip(ids, flags, funcs): + result = table.find_method(signature) + assert result is not None + + got_func, got_flag = result + assert func == got_func, (func, got_func) + # assert flag == got_flag, (flag, got_flag) + +def test_methodtable(): + # ids = ["ff->f", "dd->d", "ii->i", "ll->l", "OO->O"] + + ids = make_ids() + flags = range(1, len(ids) + 1) + funcs = range(len(ids)) + + step = 100 + + i = len(ids) + for i in range(1, len(ids), step): + t = time.time() + build_and_verify_methodtable(ids[:i], flags[:i], funcs[:i]) + t = time.time() - t + print i, "table building took", t, "seconds." + +if __name__ == '__main__': + test_methodtable() \ No newline at end of file diff --git a/extensibletype/test/test_pstdint.py b/extensibletype/test/test_pstdint.py new file mode 100644 index 0000000..45edb3d --- /dev/null +++ b/extensibletype/test/test_pstdint.py @@ -0,0 +1,4 @@ +from . import pstdint + +def test_pstdint(): + pstdint.test_pstdint() diff --git a/include/customslots.h b/include/customslots.h index b86a4da..2ec3d33 100644 --- a/include/customslots.h +++ b/include/customslots.h @@ -6,11 +6,20 @@ extern "C" { #include #include -#include -/* Some stdint.h implementations: -Portable: http://www.azillionmonkeys.com/qed/pstdint.h -MSVC: http://msinttypes.googlecode.com/svn/trunk/stdint.h + +/* +Make this work by default on all platforms using pstdint: + + Portable: http://www.azillionmonkeys.com/qed/pstdint.h + +There is also: + + MSVC: http://msinttypes.googlecode.com/svn/trunk/stdint.h + +We could conditionally include it, but we would need to use something +like autoconf... */ +#include #if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 95)) #define PY_CUSTOMSLOTS_LIKELY(x) __builtin_expect(!!(x), 1) diff --git a/include/globalinterning.h b/include/globalinterning.h new file mode 100644 index 0000000..438f79f --- /dev/null +++ b/include/globalinterning.h @@ -0,0 +1,114 @@ +#ifndef Py_GLOBAL_INTERN_H +#define Py_GLOBAL_INTERN_H_ +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include "interning.h" + +static const char *_table_name = "_global_table_v1"; +static intern_table_t _global_intern_table; +static intern_table_t *_global_table = NULL; + +/* Interning API */ + +/* Uses functions so we can get the address (and make it + accessible from FFIs) */ + +/* Get a unique prehash for a signature string. + Returns 0 on error with an exception set ('except? 0'). */ +static uint64_t +PyIntern_AddKey(const char *key) +{ + if (_global_table == NULL) { + PyErr_SetString(PyExc_AssertionError, + "Intern table not set, did you call PyIntern_Initialize()?"); + return 0; + } + + return intern_key(_global_table, key); +} + +static PyObject * +capsule_create(void *p, const char *sig) +{ + PyObject *capsule; + +#if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 0) + capsule = PyCapsule_New(p, sig, NULL); +#else + capsule = PyCObject_FromVoidPtr(p, NULL); +#endif + + return capsule; +} + +static void * +capsule_getpointer(PyObject *capsule, const char *sig) +{ + void *cobj; + +#if PY_VERSION_HEX >= 0x02070000 && !(PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 0) + cobj = PyCapsule_GetPointer(capsule, sig); +#else + cobj = PyCObject_AsVoidPtr(capsule); +#endif + + return cobj; +} + +/* Intialize global interning table */ +static int +PyIntern_Initialize(void) { + PyObject *module = NULL; + intern_table_t *table = NULL; + PyObject *capsule = NULL; + int retval; + + if (_global_table != NULL) { + return 0; + } + + module = PyImport_AddModule("_global_interning"); /* borrowed ref */ + if (!module) + goto bad; + + if (PyObject_HasAttrString(module, _table_name)) { + capsule = PyObject_GetAttrString(module, _table_name); + if (!capsule) + goto bad; + + table = capsule_getpointer(capsule, "_intern_table"); + if (!table) + goto bad; + } else { + /* not found; create it */ + table = intern_create_table(&_global_intern_table); + if (table == NULL) + goto bad; + + capsule = capsule_create(table, "_intern_table"); + if (PyObject_SetAttrString(module, _table_name, capsule) < 0) + goto bad; + } + + /* Initialize the global variable used in macros */ + _global_table = table; + + retval = 0; + goto ret; +bad: + retval = -1; +ret: + /* module is borrowed */ + Py_XDECREF(capsule); + return retval; +} + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_GLOBAL_INTERN_H */ diff --git a/include/interning.h b/include/interning.h new file mode 100644 index 0000000..e7dd656 --- /dev/null +++ b/include/interning.h @@ -0,0 +1,258 @@ +#ifndef Py_INTERNING_H +#define Py_INTERNING_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Utility for interning strings */ +/* TODO: make it GIL-less and Python independent */ + +#include +#include +#include "pstdint.h" +#include "siphash24.h" + +#if PY_MAJOR_VERSION < 3 + #define _PyIntern_FromString PyString_FromString + #define _PyIntern_FromStringAndSize PyString_FromStringAndSize + #define _PyIntern_AsString PyString_AsString + #define _PyIntern_Size PyString_Size +#else + #define _PyIntern_FromString PyBytes_FromString + #define _PyIntern_FromStringAndSize PyBytes_FromStringAndSize + #define _PyIntern_AsString PyBytes_AsString + #define _PyIntern_Size PyBytes_Size +#endif + +/* Data types */ + +typedef struct _intern_table_t { + PyObject *signature_to_key; + PyObject *key_to_signature; + char secrets[16*4]; /* 4 secret keys, which we try in succession */ +} intern_table_t; + +/* Prototypes */ +static void intern_destroy_table(intern_table_t *table); + +/* API */ + +static void +_print_secrets(intern_table_t *table) +{ + int i, j; + + for (i = 0; i < 4; i++) { + printf("secret key[%d] = {", i); + for (j = 0; j < 16; j += 4) { + printf(" %-8x, ", *(int32_t *) &table->secrets[i * 16 + j]); + } + printf("}\n"); + } +} + +/* Create an intern table from preallocated memory. + Returns NULL on failure with an appropriate exception set. */ +static intern_table_t * +intern_create_table(intern_table_t *table) +{ + int i; + + table->signature_to_key = NULL; + table->key_to_signature = NULL; + + table->signature_to_key = PyDict_New(); + table->key_to_signature = PyDict_New(); + + if (!table->signature_to_key || !table->key_to_signature) + goto bad; + + for (i = 0; i < 16 * 4; i += 2) { + /* Take the lower two bytes from the random value, since + RAND_MAX is at least 2**16 */ + short randval = (short) rand(); /* TODO: use a better prng */ + + table->secrets[i + 0] = ((char *) &randval)[0]; + table->secrets[i + 1] = ((char *) &randval)[1]; + } + /* Amend this! */ + memset(&table->secrets[16*0], 0, 16); + memset(&table->secrets[16*1], 1, 16); + memset(&table->secrets[16*2], 2, 16); + memset(&table->secrets[16*3], 3, 16); + + /* _print_secrets(table); */ + + return table; +bad: + intern_destroy_table(table); + return NULL; +} + +static void +intern_destroy_table(intern_table_t *table) +{ + Py_CLEAR(table->signature_to_key); + Py_CLEAR(table->key_to_signature); +} + +/* + Update table with a prehash candidate. + + Returns -1 on error, 0 on duplicate prehash, 1 on success. + */ +static int +update_table(intern_table_t *table, PyObject *key_obj, uint64_t prehash) +{ + PyObject *value; + int retcode; + int result; + + /* TODO: Py_LONG_LONG may not be 64 bits... */ + #if PY_ULLONG_MAX < 0xffffffffffffffffULL + #error "sizeof(unsigned PY_LONG_LONG) must be at least 8 bytes" + #endif + + value = PyLong_FromUnsignedLongLong(prehash); + if (!value) + goto bad; + + /* See whether we already have this hash for a different signature string */ + result = PyDict_Contains(table->key_to_signature, value); + if (result != 0) { + if (result == -1) + goto bad; + else + goto duplicate; + } + + if (PyDict_SetItem(table->signature_to_key, key_obj, value) < 0) + goto bad; + + if (PyDict_SetItem(table->key_to_signature, value, key_obj) < 0) { + PyDict_DelItem(table->signature_to_key, key_obj); + goto bad; + } + + retcode = 1; + goto done; + +bad: + retcode = -1; + +duplicate: + retcode = 0; + +done: + Py_XDECREF(value); + return retcode; +} + +/* Build prehash using siphash given the signature string and a secret key */ +static uint64_t +_intern_build_key(PyObject *key_obj, const char *key, const char *secret) +{ + Py_ssize_t len = _PyIntern_Size(key_obj); + uint64_t prehash; + (void) crypto_auth((unsigned char *) &prehash, + (const unsigned char *) key, + len, + (const unsigned char *) secret); + return prehash; +} + +/* Make a prehash for a signature string, trying different secret keys in + succession. */ +static int +make_prehash(intern_table_t *table, PyObject *key_obj, const char *key, + uint64_t *prehash_out) +{ + const char *secret = table->secrets; + int tries = 0; + uint64_t prehash; + int result; + + while (1) { + int result; + prehash = _intern_build_key(key_obj, key, secret); + result = update_table(table, key_obj, prehash); + if (result < 0) { + goto bad; + } else if (result == 0) { + /* Duplicate, keep going */ + secret += 16; + if (++tries == 4) { + PyErr_SetString(PyExc_ValueError, + "Failed to create unique prehash"); + goto bad; + } + } else { + /* We have a unique prehash */ + break; + } + } + + *prehash_out = prehash; + return 0; +bad: + return -1; +} + +static uint64_t +_intern_key(intern_table_t *table, PyObject *key_obj, const char *key) +{ + PyObject *value; + PyObject *tmp = NULL; + uint64_t prehash; + + value = PyDict_GetItem(table->signature_to_key, key_obj); + + if (value == NULL) { + /* Key not in dict */ + Py_INCREF(key_obj); + if (make_prehash(table, key_obj, key, &prehash) < 0) + goto bad; + } else { + prehash = PyLong_AsUnsignedLongLong(value); + if (PyErr_Occurred()) + goto bad; + } + + goto done; + +bad: + prehash = 0; + +done: + Py_XDECREF(tmp); + return prehash; +} + +/* + + Intern a signature string and return a unique prehash, to be used to + compute the final hash in a perfect hashing vtable. + + Callers should check for errors using PyErr_Occurred() when this function + returns 0. +*/ +static uint64_t +intern_key(intern_table_t *table, const char *key) +{ + PyObject *key_obj = _PyIntern_FromString(key); + uint64_t retval; + + if (key_obj == NULL) + return 0; + + retval = _intern_key(table, key_obj, key); + + Py_DECREF(key_obj); + return retval; +} + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNING_H */ diff --git a/include/perfecthash.h b/include/perfecthash.h index aa4e042..c500c06 100644 --- a/include/perfecthash.h +++ b/include/perfecthash.h @@ -1,9 +1,8 @@ -#include #include +#include "pstdint.h" typedef struct { - char *id; - uintptr_t flags; + uint64_t id; void *ptr; } PyCustomSlots_Entry; @@ -41,11 +40,12 @@ uint64_t PyCustomSlots_roundup_2pow(uint64_t x) { #define BIN_LIMIT 8 - + void _PyCustomSlots_bucket_argsort(uint16_t *p, uint8_t *binsizes, - uint8_t *number_of_bins_by_size) { + uint16_t *number_of_bins_by_size) { uint16_t *sort_bins[BIN_LIMIT]; - int binsize, ibin, nbins; + int binsize; + uint32_t ibin, nbins; nbins = 0; /* We know how many bins there are of each size, so place pointers for each size along on the output array p */ @@ -71,9 +71,10 @@ int _PyCustomSlots_FindDisplacements(PyCustomSlots_Table *table, PyCustomSlots_Entry *entries_copy) { uint16_t *d = (void*)((char*)table + sizeof(PyCustomSlots_Table)); uint16_t nbins = table->b; + uint16_t n = table->n; uint64_t m_f = table->m_f; uint8_t r = table->r; - int i, j, bin; + uint16_t i, j, bin; /* Step 1: Validate that f is 1:1 in each bin */ for (j = 0; j != nbins; ++j) { @@ -91,16 +92,17 @@ int _PyCustomSlots_FindDisplacements(PyCustomSlots_Table *table, /* Step 2: Attempt to assign displacements d[bin], starting with the largest bin */ - for (i = 0; i != nbins; ++i) { + for (i = 0; i != n; ++i) { taken[i] = 0; } + for (j = 0; j != nbins; ++j) { uint16_t dval; bin = p[j]; if (binsizes[bin] == 0) { d[bin] = 0; } else { - for (dval = 0; dval != nbins; ++dval) { + for (dval = 0; dval != n; ++dval) { int k; int collides = 0; for (k = 0; k != binsizes[bin]; ++k) { @@ -113,7 +115,7 @@ int _PyCustomSlots_FindDisplacements(PyCustomSlots_Table *table, } if (!collides) break; } - if (dval == nbins) { + if (dval == n) { /* no appropriate dval found */ return -1; } else { @@ -134,7 +136,8 @@ int _PyCustomSlots_FindDisplacements(PyCustomSlots_Table *table, } int PyCustomSlots_PerfectHash(PyCustomSlots_Table *table, uint64_t *hashes) { - uint16_t bin, j; + int result, r, retcode; + uint32_t bin, j; uint8_t binsize; uint16_t i, n = table->n, b = table->b; uint64_t m_f = PyCustomSlots_roundup_2pow(table->n) - 1; @@ -143,13 +146,18 @@ int PyCustomSlots_PerfectHash(PyCustomSlots_Table *table, uint64_t *hashes) { uint8_t *binsizes = malloc(sizeof(uint8_t) * b); uint16_t *p = malloc(sizeof(uint16_t) * b); uint8_t *taken = malloc(sizeof(uint8_t) * n); - uint8_t number_of_bins_by_size[BIN_LIMIT]; + uint16_t number_of_bins_by_size[BIN_LIMIT]; PyCustomSlots_Entry *entries_copy = malloc(sizeof(PyCustomSlots_Entry) * n); + if (!bins || !binsizes || !p || !taken || !entries_copy) { + printf("Error: Unable to allocate memory\n"); + goto error; + } + for (i = 0; i != n; ++i) { entries_copy[i] = table->entries[i]; } - + /* Bin the n hashes into b bins based on the g hash. Also count the number of bins of each size. */ for (bin = 0; bin != b; ++bin) { @@ -161,10 +169,12 @@ int PyCustomSlots_PerfectHash(PyCustomSlots_Table *table, uint64_t *hashes) { } for (i = 0; i != n; ++i) { bin = hashes[i] & m_g; + if (bin > b) + abort(); binsize = ++binsizes[bin]; if (binsize == BIN_LIMIT) { - printf("ERROR 1\n"); - return -1; + printf("Error: Bin limit reached\n"); + goto error; } bins[BIN_LIMIT * bin + binsize - 1] = i; number_of_bins_by_size[binsize - 1]--; @@ -174,11 +184,23 @@ int PyCustomSlots_PerfectHash(PyCustomSlots_Table *table, uint64_t *hashes) { /* argsort the bins (p stores permutation) from largest to smallest, using binsort */ _PyCustomSlots_bucket_argsort(p, binsizes, &number_of_bins_by_size[0]); + /* + for (i = 0; i < BIN_LIMIT; i++) { + printf("bin_by_size[%d] = %d\n", i, number_of_bins_by_size[i]); + } + */ + + /* Sanity check */ + for (i = 0; i < b; ++i) { + if (!(p[i] < b)) { + printf("ERROR: p[%d]=%d\n", i, p[i]); + abort(); + } + } /* Find perfect table -- try again for each choice of r */ table->m_f = m_f; table->m_g = m_g; - int r, retcode; for (r = 64; r != -1; --r) { table->r = r; retcode = _PyCustomSlots_FindDisplacements(table, hashes, binsizes, bins, p, @@ -188,6 +210,20 @@ int PyCustomSlots_PerfectHash(PyCustomSlots_Table *table, uint64_t *hashes) { } } + if (retcode != 0) { + printf("Error: No suitable table found\n"); + goto error; + } + + result = 0; + goto cleanup; + +error: + + result = -1; + +cleanup: + /*TODO does not free on error... */ free(bins); free(binsizes); @@ -195,5 +231,5 @@ int PyCustomSlots_PerfectHash(PyCustomSlots_Table *table, uint64_t *hashes) { free(taken); free(entries_copy); - return 0; + return result; } diff --git a/include/pstdint.h b/include/pstdint.h new file mode 100644 index 0000000..2e4dbff --- /dev/null +++ b/include/pstdint.h @@ -0,0 +1,800 @@ +/* A portable stdint.h + **************************************************************************** + * BSD License: + **************************************************************************** + * + * Copyright (c) 2005-2011 Paul Hsieh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************** + * + * Version 0.1.12 + * + * The ANSI C standard committee, for the C99 standard, specified the + * inclusion of a new standard include file called stdint.h. This is + * a very useful and long desired include file which contains several + * very precise definitions for integer scalar types that is + * critically important for making portable several classes of + * applications including cryptography, hashing, variable length + * integer libraries and so on. But for most developers its likely + * useful just for programming sanity. + * + * The problem is that most compiler vendors have decided not to + * implement the C99 standard, and the next C++ language standard + * (which has a lot more mindshare these days) will be a long time in + * coming and its unknown whether or not it will include stdint.h or + * how much adoption it will have. Either way, it will be a long time + * before all compilers come with a stdint.h and it also does nothing + * for the extremely large number of compilers available today which + * do not include this file, or anything comparable to it. + * + * So that's what this file is all about. Its an attempt to build a + * single universal include file that works on as many platforms as + * possible to deliver what stdint.h is supposed to. A few things + * that should be noted about this file: + * + * 1) It is not guaranteed to be portable and/or present an identical + * interface on all platforms. The extreme variability of the + * ANSI C standard makes this an impossibility right from the + * very get go. Its really only meant to be useful for the vast + * majority of platforms that possess the capability of + * implementing usefully and precisely defined, standard sized + * integer scalars. Systems which are not intrinsically 2s + * complement may produce invalid constants. + * + * 2) There is an unavoidable use of non-reserved symbols. + * + * 3) Other standard include files are invoked. + * + * 4) This file may come in conflict with future platforms that do + * include stdint.h. The hope is that one or the other can be + * used with no real difference. + * + * 5) In the current verison, if your platform can't represent + * int32_t, int16_t and int8_t, it just dumps out with a compiler + * error. + * + * 6) 64 bit integers may or may not be defined. Test for their + * presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX. + * Note that this is different from the C99 specification which + * requires the existence of 64 bit support in the compiler. If + * this is not defined for your platform, yet it is capable of + * dealing with 64 bits then it is because this file has not yet + * been extended to cover all of your system's capabilities. + * + * 7) (u)intptr_t may or may not be defined. Test for its presence + * with the test: #ifdef PTRDIFF_MAX. If this is not defined + * for your platform, then it is because this file has not yet + * been extended to cover all of your system's capabilities, not + * because its optional. + * + * 8) The following might not been defined even if your platform is + * capable of defining it: + * + * WCHAR_MIN + * WCHAR_MAX + * (u)int64_t + * PTRDIFF_MIN + * PTRDIFF_MAX + * (u)intptr_t + * + * 9) The following have not been defined: + * + * WINT_MIN + * WINT_MAX + * + * 10) The criteria for defining (u)int_least(*)_t isn't clear, + * except for systems which don't have a type that precisely + * defined 8, 16, or 32 bit types (which this include file does + * not support anyways). Default definitions have been given. + * + * 11) The criteria for defining (u)int_fast(*)_t isn't something I + * would trust to any particular compiler vendor or the ANSI C + * committee. It is well known that "compatible systems" are + * commonly created that have very different performance + * characteristics from the systems they are compatible with, + * especially those whose vendors make both the compiler and the + * system. Default definitions have been given, but its strongly + * recommended that users never use these definitions for any + * reason (they do *NOT* deliver any serious guarantee of + * improved performance -- not in this file, nor any vendor's + * stdint.h). + * + * 12) The following macros: + * + * PRINTF_INTMAX_MODIFIER + * PRINTF_INT64_MODIFIER + * PRINTF_INT32_MODIFIER + * PRINTF_INT16_MODIFIER + * PRINTF_LEAST64_MODIFIER + * PRINTF_LEAST32_MODIFIER + * PRINTF_LEAST16_MODIFIER + * PRINTF_INTPTR_MODIFIER + * + * are strings which have been defined as the modifiers required + * for the "d", "u" and "x" printf formats to correctly output + * (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t, + * (u)least32_t, (u)least16_t and (u)intptr_t types respectively. + * PRINTF_INTPTR_MODIFIER is not defined for some systems which + * provide their own stdint.h. PRINTF_INT64_MODIFIER is not + * defined if INT64_MAX is not defined. These are an extension + * beyond what C99 specifies must be in stdint.h. + * + * In addition, the following macros are defined: + * + * PRINTF_INTMAX_HEX_WIDTH + * PRINTF_INT64_HEX_WIDTH + * PRINTF_INT32_HEX_WIDTH + * PRINTF_INT16_HEX_WIDTH + * PRINTF_INT8_HEX_WIDTH + * PRINTF_INTMAX_DEC_WIDTH + * PRINTF_INT64_DEC_WIDTH + * PRINTF_INT32_DEC_WIDTH + * PRINTF_INT16_DEC_WIDTH + * PRINTF_INT8_DEC_WIDTH + * + * Which specifies the maximum number of characters required to + * print the number of that type in either hexadecimal or decimal. + * These are an extension beyond what C99 specifies must be in + * stdint.h. + * + * Compilers tested (all with 0 warnings at their highest respective + * settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32 + * bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio + * .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3 + * + * This file should be considered a work in progress. Suggestions for + * improvements, especially those which increase coverage are strongly + * encouraged. + * + * Acknowledgements + * + * The following people have made significant contributions to the + * development and testing of this file: + * + * Chris Howie + * John Steele Scott + * Dave Thorup + * John Dill + * + */ + +#include +#include +#include + +/* + * For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and + * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_. + */ + +#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED) +#include +#define _PSTDINT_H_INCLUDED +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER +# endif +# ifndef PRINTF_INT64_HEX_WIDTH +# define PRINTF_INT64_HEX_WIDTH "16" +# endif +# ifndef PRINTF_INT32_HEX_WIDTH +# define PRINTF_INT32_HEX_WIDTH "8" +# endif +# ifndef PRINTF_INT16_HEX_WIDTH +# define PRINTF_INT16_HEX_WIDTH "4" +# endif +# ifndef PRINTF_INT8_HEX_WIDTH +# define PRINTF_INT8_HEX_WIDTH "2" +# endif +# ifndef PRINTF_INT64_DEC_WIDTH +# define PRINTF_INT64_DEC_WIDTH "20" +# endif +# ifndef PRINTF_INT32_DEC_WIDTH +# define PRINTF_INT32_DEC_WIDTH "10" +# endif +# ifndef PRINTF_INT16_DEC_WIDTH +# define PRINTF_INT16_DEC_WIDTH "5" +# endif +# ifndef PRINTF_INT8_DEC_WIDTH +# define PRINTF_INT8_DEC_WIDTH "3" +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH +# endif + +/* + * Something really weird is going on with Open Watcom. Just pull some of + * these duplicated definitions from Open Watcom's stdint.h file for now. + */ + +# if defined (__WATCOMC__) && __WATCOMC__ >= 1250 +# if !defined (INT64_C) +# define INT64_C(x) (x + (INT64_MAX - INT64_MAX)) +# endif +# if !defined (UINT64_C) +# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) +# endif +# if !defined (INT32_C) +# define INT32_C(x) (x + (INT32_MAX - INT32_MAX)) +# endif +# if !defined (UINT32_C) +# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX)) +# endif +# if !defined (INT16_C) +# define INT16_C(x) (x) +# endif +# if !defined (UINT16_C) +# define UINT16_C(x) (x) +# endif +# if !defined (INT8_C) +# define INT8_C(x) (x) +# endif +# if !defined (UINT8_C) +# define UINT8_C(x) (x) +# endif +# if !defined (UINT64_MAX) +# define UINT64_MAX 18446744073709551615ULL +# endif +# if !defined (INT64_MAX) +# define INT64_MAX 9223372036854775807LL +# endif +# if !defined (UINT32_MAX) +# define UINT32_MAX 4294967295UL +# endif +# if !defined (INT32_MAX) +# define INT32_MAX 2147483647L +# endif +# if !defined (INTMAX_MAX) +# define INTMAX_MAX INT64_MAX +# endif +# if !defined (INTMAX_MIN) +# define INTMAX_MIN INT64_MIN +# endif +# endif +#endif + +#ifndef _PSTDINT_H_INCLUDED +#define _PSTDINT_H_INCLUDED + +#ifndef SIZE_MAX +# define SIZE_MAX (~(size_t)0) +#endif + +/* + * Deduce the type assignments from limits.h under the assumption that + * integer sizes in bits are powers of 2, and follow the ANSI + * definitions. + */ + +#ifndef UINT8_MAX +# define UINT8_MAX 0xff +#endif +#ifndef uint8_t +# if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S) + typedef unsigned char uint8_t; +# define UINT8_C(v) ((uint8_t) v) +# else +# error "Platform not supported" +# endif +#endif + +#ifndef INT8_MAX +# define INT8_MAX 0x7f +#endif +#ifndef INT8_MIN +# define INT8_MIN INT8_C(0x80) +#endif +#ifndef int8_t +# if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S) + typedef signed char int8_t; +# define INT8_C(v) ((int8_t) v) +# else +# error "Platform not supported" +# endif +#endif + +#ifndef UINT16_MAX +# define UINT16_MAX 0xffff +#endif +#ifndef uint16_t +#if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S) + typedef unsigned int uint16_t; +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "" +# endif +# define UINT16_C(v) ((uint16_t) (v)) +#elif (USHRT_MAX == UINT16_MAX) + typedef unsigned short uint16_t; +# define UINT16_C(v) ((uint16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef INT16_MAX +# define INT16_MAX 0x7fff +#endif +#ifndef INT16_MIN +# define INT16_MIN INT16_C(0x8000) +#endif +#ifndef int16_t +#if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S) + typedef signed int int16_t; +# define INT16_C(v) ((int16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "" +# endif +#elif (SHRT_MAX == INT16_MAX) + typedef signed short int16_t; +# define INT16_C(v) ((int16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef UINT32_MAX +# define UINT32_MAX (0xffffffffUL) +#endif +#ifndef uint32_t +#if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S) + typedef unsigned long uint32_t; +# define UINT32_C(v) v ## UL +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +#elif (UINT_MAX == UINT32_MAX) + typedef unsigned int uint32_t; +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +# define UINT32_C(v) v ## U +#elif (USHRT_MAX == UINT32_MAX) + typedef unsigned short uint32_t; +# define UINT32_C(v) ((unsigned short) (v)) +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef INT32_MAX +# define INT32_MAX (0x7fffffffL) +#endif +#ifndef INT32_MIN +# define INT32_MIN INT32_C(0x80000000) +#endif +#ifndef int32_t +#if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S) + typedef signed long int32_t; +# define INT32_C(v) v ## L +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +#elif (INT_MAX == INT32_MAX) + typedef signed int int32_t; +# define INT32_C(v) v +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#elif (SHRT_MAX == INT32_MAX) + typedef signed short int32_t; +# define INT32_C(v) ((short) (v)) +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#else +#error "Platform not supported" +#endif +#endif + +/* + * The macro stdint_int64_defined is temporarily used to record + * whether or not 64 integer support is available. It must be + * defined for any 64 integer extensions for new platforms that are + * added. + */ + +#undef stdint_int64_defined +#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S) +# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S) +# define stdint_int64_defined + typedef long long int64_t; + typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# endif +#endif + +#if !defined (stdint_int64_defined) +# if defined(__GNUC__) +# define stdint_int64_defined + __extension__ typedef long long int64_t; + __extension__ typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S) +# define stdint_int64_defined + typedef long long int64_t; + typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC) +# define stdint_int64_defined + typedef __int64 int64_t; + typedef unsigned __int64 uint64_t; +# define UINT64_C(v) v ## UI64 +# define INT64_C(v) v ## I64 +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "I64" +# endif +# endif +#endif + +#if !defined (LONG_LONG_MAX) && defined (INT64_C) +# define LONG_LONG_MAX INT64_C (9223372036854775807) +#endif +#ifndef ULONG_LONG_MAX +# define ULONG_LONG_MAX UINT64_C (18446744073709551615) +#endif + +#if !defined (INT64_MAX) && defined (INT64_C) +# define INT64_MAX INT64_C (9223372036854775807) +#endif +#if !defined (INT64_MIN) && defined (INT64_C) +# define INT64_MIN INT64_C (-9223372036854775808) +#endif +#if !defined (UINT64_MAX) && defined (INT64_C) +# define UINT64_MAX UINT64_C (18446744073709551615) +#endif + +/* + * Width of hexadecimal for number field. + */ + +#ifndef PRINTF_INT64_HEX_WIDTH +# define PRINTF_INT64_HEX_WIDTH "16" +#endif +#ifndef PRINTF_INT32_HEX_WIDTH +# define PRINTF_INT32_HEX_WIDTH "8" +#endif +#ifndef PRINTF_INT16_HEX_WIDTH +# define PRINTF_INT16_HEX_WIDTH "4" +#endif +#ifndef PRINTF_INT8_HEX_WIDTH +# define PRINTF_INT8_HEX_WIDTH "2" +#endif + +#ifndef PRINTF_INT64_DEC_WIDTH +# define PRINTF_INT64_DEC_WIDTH "20" +#endif +#ifndef PRINTF_INT32_DEC_WIDTH +# define PRINTF_INT32_DEC_WIDTH "10" +#endif +#ifndef PRINTF_INT16_DEC_WIDTH +# define PRINTF_INT16_DEC_WIDTH "5" +#endif +#ifndef PRINTF_INT8_DEC_WIDTH +# define PRINTF_INT8_DEC_WIDTH "3" +#endif + +/* + * Ok, lets not worry about 128 bit integers for now. Moore's law says + * we don't need to worry about that until about 2040 at which point + * we'll have bigger things to worry about. + */ + +#ifdef stdint_int64_defined + typedef int64_t intmax_t; + typedef uint64_t uintmax_t; +# define INTMAX_MAX INT64_MAX +# define INTMAX_MIN INT64_MIN +# define UINTMAX_MAX UINT64_MAX +# define UINTMAX_C(v) UINT64_C(v) +# define INTMAX_C(v) INT64_C(v) +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH +# endif +#else + typedef int32_t intmax_t; + typedef uint32_t uintmax_t; +# define INTMAX_MAX INT32_MAX +# define UINTMAX_MAX UINT32_MAX +# define UINTMAX_C(v) UINT32_C(v) +# define INTMAX_C(v) INT32_C(v) +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH +# endif +#endif + +/* + * Because this file currently only supports platforms which have + * precise powers of 2 as bit sizes for the default integers, the + * least definitions are all trivial. Its possible that a future + * version of this file could have different definitions. + */ + +#ifndef stdint_least_defined + typedef int8_t int_least8_t; + typedef uint8_t uint_least8_t; + typedef int16_t int_least16_t; + typedef uint16_t uint_least16_t; + typedef int32_t int_least32_t; + typedef uint32_t uint_least32_t; +# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER +# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER +# define UINT_LEAST8_MAX UINT8_MAX +# define INT_LEAST8_MAX INT8_MAX +# define UINT_LEAST16_MAX UINT16_MAX +# define INT_LEAST16_MAX INT16_MAX +# define UINT_LEAST32_MAX UINT32_MAX +# define INT_LEAST32_MAX INT32_MAX +# define INT_LEAST8_MIN INT8_MIN +# define INT_LEAST16_MIN INT16_MIN +# define INT_LEAST32_MIN INT32_MIN +# ifdef stdint_int64_defined + typedef int64_t int_least64_t; + typedef uint64_t uint_least64_t; +# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER +# define UINT_LEAST64_MAX UINT64_MAX +# define INT_LEAST64_MAX INT64_MAX +# define INT_LEAST64_MIN INT64_MIN +# endif +#endif +#undef stdint_least_defined + +/* + * The ANSI C committee pretending to know or specify anything about + * performance is the epitome of misguided arrogance. The mandate of + * this file is to *ONLY* ever support that absolute minimum + * definition of the fast integer types, for compatibility purposes. + * No extensions, and no attempt to suggest what may or may not be a + * faster integer type will ever be made in this file. Developers are + * warned to stay away from these types when using this or any other + * stdint.h. + */ + +typedef int_least8_t int_fast8_t; +typedef uint_least8_t uint_fast8_t; +typedef int_least16_t int_fast16_t; +typedef uint_least16_t uint_fast16_t; +typedef int_least32_t int_fast32_t; +typedef uint_least32_t uint_fast32_t; +#define UINT_FAST8_MAX UINT_LEAST8_MAX +#define INT_FAST8_MAX INT_LEAST8_MAX +#define UINT_FAST16_MAX UINT_LEAST16_MAX +#define INT_FAST16_MAX INT_LEAST16_MAX +#define UINT_FAST32_MAX UINT_LEAST32_MAX +#define INT_FAST32_MAX INT_LEAST32_MAX +#define INT_FAST8_MIN INT_LEAST8_MIN +#define INT_FAST16_MIN INT_LEAST16_MIN +#define INT_FAST32_MIN INT_LEAST32_MIN +#ifdef stdint_int64_defined + typedef int_least64_t int_fast64_t; + typedef uint_least64_t uint_fast64_t; +# define UINT_FAST64_MAX UINT_LEAST64_MAX +# define INT_FAST64_MAX INT_LEAST64_MAX +# define INT_FAST64_MIN INT_LEAST64_MIN +#endif + +#undef stdint_int64_defined + +/* + * Whatever piecemeal, per compiler thing we can do about the wchar_t + * type limits. + */ + +#if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__) +# include +# ifndef WCHAR_MIN +# define WCHAR_MIN 0 +# endif +# ifndef WCHAR_MAX +# define WCHAR_MAX ((wchar_t)-1) +# endif +#endif + +/* + * Whatever piecemeal, per compiler/platform thing we can do about the + * (u)intptr_t types and limits. + */ + +#if defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED) +# define STDINT_H_UINTPTR_T_DEFINED +#endif + +#ifndef STDINT_H_UINTPTR_T_DEFINED +# if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) +# define stdint_intptr_bits 64 +# elif defined (__WATCOMC__) || defined (__TURBOC__) +# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__) +# define stdint_intptr_bits 16 +# else +# define stdint_intptr_bits 32 +# endif +# elif defined (__i386__) || defined (_WIN32) || defined (WIN32) +# define stdint_intptr_bits 32 +# elif defined (__INTEL_COMPILER) +/* TODO -- what did Intel do about x86-64? */ +# endif + +# ifdef stdint_intptr_bits +# define stdint_intptr_glue3_i(a,b,c) a##b##c +# define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c) +# ifndef PRINTF_INTPTR_MODIFIER +# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER) +# endif +# ifndef PTRDIFF_MAX +# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) +# endif +# ifndef PTRDIFF_MIN +# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) +# endif +# ifndef UINTPTR_MAX +# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX) +# endif +# ifndef INTPTR_MAX +# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) +# endif +# ifndef INTPTR_MIN +# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) +# endif +# ifndef INTPTR_C +# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x) +# endif +# ifndef UINTPTR_C +# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x) +# endif + typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t; + typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t; +# else +/* TODO -- This following is likely wrong for some platforms, and does + nothing for the definition of uintptr_t. */ + typedef ptrdiff_t intptr_t; +# endif +# define STDINT_H_UINTPTR_T_DEFINED +#endif + +/* + * Assumes sig_atomic_t is signed and we have a 2s complement machine. + */ + +#ifndef SIG_ATOMIC_MAX +# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1) +#endif + +#endif + +#if defined (__TEST_PSTDINT_FOR_CORRECTNESS) + +/* + * Please compile with the maximum warning settings to make sure macros are not + * defined more than once. + */ + +#include +#include +#include + +#define glue3_aux(x,y,z) x ## y ## z +#define glue3(x,y,z) glue3_aux(x,y,z) + +#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0); +#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0); + +#define DECL(us,bits) glue3(DECL,us,) (bits) + +#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits) + +int main () { + DECL(I,8) + DECL(U,8) + DECL(I,16) + DECL(U,16) + DECL(I,32) + DECL(U,32) +#ifdef INT64_MAX + DECL(I,64) + DECL(U,64) +#endif + intmax_t imax = INTMAX_C(0); + uintmax_t umax = UINTMAX_C(0); + char str0[256], str1[256]; + + sprintf (str0, "%d %x\n", 0, ~0); + + sprintf (str1, "%d %x\n", i8, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1); + sprintf (str1, "%u %x\n", u8, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1); + sprintf (str1, "%d %x\n", i16, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1); + sprintf (str1, "%u %x\n", u16, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1); + sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1); + sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1); +#ifdef INT64_MAX + sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1); +#endif + sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1); + sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0); + if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1); + + TESTUMAX(8); + TESTUMAX(16); + TESTUMAX(32); +#ifdef INT64_MAX + TESTUMAX(64); +#endif + + return EXIT_SUCCESS; +} + +#endif \ No newline at end of file diff --git a/include/siphash24.c b/include/siphash24.c new file mode 100644 index 0000000..ba8b02f --- /dev/null +++ b/include/siphash24.c @@ -0,0 +1,245 @@ +/* + SipHash reference C implementation + + Written in 2012 by + Jean-Philippe Aumasson + Daniel J. Bernstein + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see . +*/ + +#include +#include +#include "pstdint.h" + +typedef uint64_t u64; +typedef uint32_t u32; +typedef uint8_t u8; + +#define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) ) + +#define U32TO8_LE(p, v) \ + (p)[0] = (u8)((v) ); (p)[1] = (u8)((v) >> 8); \ + (p)[2] = (u8)((v) >> 16); (p)[3] = (u8)((v) >> 24); + +#define U64TO8_LE(p, v) \ + U32TO8_LE((p), (u32)((v) )); \ + U32TO8_LE((p) + 4, (u32)((v) >> 32)); + +#define U8TO64_LE(p) \ + (((u64)((p)[0]) ) | \ + ((u64)((p)[1]) << 8) | \ + ((u64)((p)[2]) << 16) | \ + ((u64)((p)[3]) << 24) | \ + ((u64)((p)[4]) << 32) | \ + ((u64)((p)[5]) << 40) | \ + ((u64)((p)[6]) << 48) | \ + ((u64)((p)[7]) << 56)) + +#define SIPROUND \ + do { \ + v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \ + v2 += v3; v3=ROTL(v3,16); v3 ^= v2; \ + v0 += v3; v3=ROTL(v3,21); v3 ^= v0; \ + v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \ + } while(0) + +/* SipHash-2-4 */ +int crypto_auth( unsigned char *out, const unsigned char *in, unsigned long long inlen, const unsigned char *k ) +{ + /* "somepseudorandomlygeneratedbytes" */ + u64 v0 = 0x736f6d6570736575ULL; + u64 v1 = 0x646f72616e646f6dULL; + u64 v2 = 0x6c7967656e657261ULL; + u64 v3 = 0x7465646279746573ULL; + u64 b; + u64 k0 = U8TO64_LE( k ); + u64 k1 = U8TO64_LE( k + 8 ); + u64 m; + const u8 *end = in + inlen - ( inlen % sizeof( u64 ) ); + const int left = inlen & 7; + b = ( ( u64 )inlen ) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + + for ( ; in != end; in += 8 ) + { + m = U8TO64_LE( in ); +#ifdef DEBUG + printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 ); + printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 ); + printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 ); + printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 ); + printf( "(%3d) compress %08x %08x\n", ( int )inlen, ( u32 )( m >> 32 ), ( u32 )m ); +#endif + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } + + switch( left ) + { + case 7: b |= ( ( u64 )in[ 6] ) << 48; + + case 6: b |= ( ( u64 )in[ 5] ) << 40; + + case 5: b |= ( ( u64 )in[ 4] ) << 32; + + case 4: b |= ( ( u64 )in[ 3] ) << 24; + + case 3: b |= ( ( u64 )in[ 2] ) << 16; + + case 2: b |= ( ( u64 )in[ 1] ) << 8; + + case 1: b |= ( ( u64 )in[ 0] ); break; + + case 0: break; + } + +#ifdef DEBUG + printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 ); + printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 ); + printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 ); + printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 ); + printf( "(%3d) padding %08x %08x\n", ( int )inlen, ( u32 )( b >> 32 ), ( u32 )b ); +#endif + v3 ^= b; + SIPROUND; + SIPROUND; + v0 ^= b; +#ifdef DEBUG + printf( "(%3d) v0 %08x %08x\n", ( int )inlen, ( u32 )( v0 >> 32 ), ( u32 )v0 ); + printf( "(%3d) v1 %08x %08x\n", ( int )inlen, ( u32 )( v1 >> 32 ), ( u32 )v1 ); + printf( "(%3d) v2 %08x %08x\n", ( int )inlen, ( u32 )( v2 >> 32 ), ( u32 )v2 ); + printf( "(%3d) v3 %08x %08x\n", ( int )inlen, ( u32 )( v3 >> 32 ), ( u32 )v3 ); +#endif + v2 ^= 0xff; + SIPROUND; + SIPROUND; + SIPROUND; + SIPROUND; + b = v0 ^ v1 ^ v2 ^ v3; + U64TO8_LE( out, b ); + return 0; +} + +/* + SipHash-2-4 output with + k = 00 01 02 ... + and + in = (empty string) + in = 00 (1 byte) + in = 00 01 (2 bytes) + in = 00 01 02 (3 bytes) + ... + in = 00 01 02 ... 3e (63 bytes) +*/ +u8 vectors[64][8] = +{ + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, }, + { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, }, + { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, }, + { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, }, + { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, }, + { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, }, + { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, }, + { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, }, + { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, }, + { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, }, + { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, }, + { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, }, + { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, }, + { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, }, + { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, }, + { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, }, + { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, }, + { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, }, + { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, }, + { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, }, + { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, }, + { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, }, + { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, }, + { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, }, + { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, }, + { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, }, + { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, }, + { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, }, + { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, }, + { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, }, + { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, }, + { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, }, + { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, }, + { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, }, + { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, }, + { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, }, + { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, }, + { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, }, + { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, }, + { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, }, + { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, }, + { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, }, + { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, }, + { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, }, + { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, }, + { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, }, + { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, }, + { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, }, + { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, }, + { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, }, + { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, }, + { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, }, + { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, }, + { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, }, + { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, }, + { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, }, + { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, }, + { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, }, + { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, }, + { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, }, + { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, }, + { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, }, + { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, }, + { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, } +}; + + +/* +int test_vectors() +{ +#define MAXLEN 64 + u8 in[MAXLEN], out[8], k[16]; + int i; + int ok = 1; + + for( i = 0; i < 16; ++i ) k[i] = i; + + for( i = 0; i < MAXLEN; ++i ) + { + in[i] = i; + crypto_auth( out, in, i, k ); + + if ( memcmp( out, vectors[i], 8 ) ) + { + printf( "test vector failed for %d bytes\n", i ); + ok = 0; + } + } + + return ok; +} + +int main() +{ + if ( test_vectors() ) printf( "test vectors ok\n" ); + + return 0; +} +*/ diff --git a/include/siphash24.h b/include/siphash24.h new file mode 100644 index 0000000..627d46e --- /dev/null +++ b/include/siphash24.h @@ -0,0 +1,12 @@ +#ifndef SIPHASH_H +#define SIPHASH_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "siphash24.c" + +#ifdef __cplusplus +} +#endif +#endif /* !SIPHASH_H */ diff --git a/setup.py b/setup.py index 435eaf0..104dc79 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,28 @@ +import os +from fnmatch import fnmatchcase +from distutils.util import convert_path from distutils.core import setup -from distutils.extension import Extension from Cython.Distutils import build_ext -import os -include_dirs = ['include', '../ulib/src/base'] +from setupconfig import get_extensions -extensions = [ - Extension("extensibletype.extensibletype", - [os.path.join("extensibletype", "extensibletype.pyx"), - '../ulib/src/base/md5sum.c', - '../ulib/src/base/hash.c'], - include_dirs=include_dirs)] +def find_packages(where='.', exclude=()): + out = [] + stack=[(convert_path(where), '')] + while stack: + where, prefix = stack.pop(0) + for name in os.listdir(where): + fn = os.path.join(where,name) + if ('.' not in name and os.path.isdir(fn) and + os.path.isfile(os.path.join(fn, '__init__.py')) + ): + out.append(prefix+name) + stack.append((fn, prefix+name+'.')) + for pat in list(exclude) + ['ez_setup', 'distribute_setup']: + out = [item for item in out if not fnmatchcase(item, pat)] + return out +root = os.path.dirname(os.path.abspath(__file__)) setup(cmdclass={'build_ext': build_ext}, - ext_modules=extensions) + ext_modules=get_extensions(path_prefix=root), + packages=find_packages()) diff --git a/setupconfig.py b/setupconfig.py new file mode 100644 index 0000000..9d1ee8c --- /dev/null +++ b/setupconfig.py @@ -0,0 +1,64 @@ +import os +import functools +from distutils.extension import Extension + +import numpy as np + +def prefix_module(prefix, module_name): + if prefix: + return "%s.%s" % (prefix, module_name) + + return module_name + +def prefix_path(prefix, path): + if prefix: + return "%s/%s" % (prefix.rstrip("/"), path.lstrip("/")) + + return path + +def make_extension(path_prefix, module_prefix, modname, sources, depends, **kwds): + _prefix_path = functools.partial(prefix_path, path_prefix) + + return Extension( + prefix_module(module_prefix, modname), + sources=list(map(_prefix_path, sources)), + depends=list(map(_prefix_path, depends)), + **kwds + ) + +def get_extensions(path_prefix, module_prefix=""): + include_dirs = [prefix_path(path_prefix, 'include'), + np.get_include()] + + perfecthash_deps = ["include/perfecthash.h"] + + Extension = functools.partial(make_extension, path_prefix, module_prefix) + + extensions = [ + Extension("extensibletype.extensibletype", + ["extensibletype/extensibletype.pyx", + #'../ulib/src/base/md5sum.c', + #'../ulib/src/base/hash.c' + ], + include_dirs=include_dirs, + depends=perfecthash_deps), + + Extension("extensibletype.intern", + ["extensibletype/intern.pyx"], + include_dirs=include_dirs, + depends=["include/globalinterning.h", + "include/interning.h", + "include/perfecthash.h"]), + + Extension("extensibletype.methodtable", + ["extensibletype/methodtable.pyx"], + include_dirs=include_dirs, + depends=perfecthash_deps), + + Extension("extensibletype.test.pstdint", + ["extensibletype/test/pstdint.pyx"], + include_dirs=include_dirs, + depends=["include/pstdint.h"]), + ] + + return extensions