Skip to content

Commit 28e4331

Browse files
committed
[draft] gf_ring
1 parent 497e57d commit 28e4331

File tree

3 files changed

+170
-133
lines changed

3 files changed

+170
-133
lines changed

src/gf_ring.cpp

Lines changed: 97 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -38,84 +38,114 @@ namespace quadiron {
3838
namespace gf {
3939

4040
template <>
41-
void RingModN<uint16_t>::neg(size_t n, uint16_t* x) const
41+
void RingModN<uint16_t>::neg(vec::Buffers<uint16_t>& buf, unsigned buf_id) const
4242
{
43-
simd::neg(n, x, this->_card);
44-
}
43+
size_t size = buf.get_size();
44+
const unsigned ratio = simd::countof<uint16_t>();
45+
const size_t simd_vec_len = size / ratio;
46+
const size_t simd_trailing_len = size - simd_vec_len * ratio;
4547

46-
template <>
47-
void RingModN<uint32_t>::neg(size_t n, uint32_t* x) const
48-
{
49-
simd::neg(n, x, this->_card);
50-
}
48+
simd::neg(buf, buf_id, simd_vec_len, this->_card);
5149

52-
template <>
53-
void RingModN<uint32_t>::mul_coef_to_buf(
54-
uint32_t a,
55-
uint32_t* src,
56-
uint32_t* dest,
57-
size_t len) const
58-
{
59-
simd::mul_coef_to_buf(a, src, dest, len, this->_card);
50+
if (simd_trailing_len) {
51+
const size_t simd_offset = simd_vec_len * ratio;
52+
for (size_t i = simd_offset; i < size; ++i) {
53+
uint16_t hi, lo;
54+
buf.get(buf_id, i, hi, lo);
55+
buf.set(buf_id, i, neg(hi), neg(lo));
56+
}
57+
}
6058
}
6159

6260
template <>
63-
void RingModN<uint32_t>::add_two_bufs(uint32_t* src, uint32_t* dest, size_t len)
64-
const
61+
void RingModN<uint32_t>::neg(vec::Buffers<uint32_t>& buf, unsigned buf_id) const
6562
{
66-
simd::add_two_bufs(src, dest, len, this->_card);
67-
}
63+
size_t size = buf.get_size();
64+
const unsigned ratio = simd::countof<uint32_t>();
65+
const size_t simd_vec_len = size / ratio;
66+
const size_t simd_trailing_len = size - simd_vec_len * ratio;
6867

69-
template <>
70-
void RingModN<uint32_t>::sub_two_bufs(
71-
uint32_t* bufa,
72-
uint32_t* bufb,
73-
uint32_t* res,
74-
size_t len) const
75-
{
76-
simd::sub_two_bufs(bufa, bufb, res, len, this->_card);
77-
}
68+
if (simd_vec_len) {
69+
simd::neg(buf, buf_id, simd_vec_len, this->_card);
70+
}
7871

79-
template <>
80-
void RingModN<uint16_t>::mul_coef_to_buf(
81-
uint16_t a,
82-
uint16_t* src,
83-
uint16_t* dest,
84-
size_t len) const
85-
{
86-
simd::mul_coef_to_buf(a, src, dest, len, this->_card);
72+
if (simd_trailing_len) {
73+
const size_t simd_offset = simd_vec_len * ratio;
74+
for (size_t i = simd_offset; i < size; ++i) {
75+
uint32_t hi, lo;
76+
buf.get(buf_id, i, hi, lo);
77+
buf.set(buf_id, i, neg(hi), neg(lo));
78+
}
79+
}
8780
}
8881

89-
template <>
90-
void RingModN<uint16_t>::add_two_bufs(uint16_t* src, uint16_t* dest, size_t len)
91-
const
92-
{
93-
simd::add_two_bufs(src, dest, len, this->_card);
94-
}
95-
96-
template <>
97-
void RingModN<uint16_t>::sub_two_bufs(
98-
uint16_t* bufa,
99-
uint16_t* bufb,
100-
uint16_t* res,
101-
size_t len) const
102-
{
103-
simd::sub_two_bufs(bufa, bufb, res, len, this->_card);
104-
}
105-
106-
template <>
107-
void RingModN<uint16_t>::hadamard_mul(int n, uint16_t* x_u16, uint16_t* y_u16)
108-
const
109-
{
110-
simd::mul_two_bufs(y_u16, x_u16, n, this->_card);
111-
}
112-
113-
template <>
114-
void RingModN<uint32_t>::hadamard_mul(int n, uint32_t* x_u32, uint32_t* y_u32)
115-
const
116-
{
117-
simd::mul_two_bufs(y_u32, x_u32, n, this->_card);
118-
}
82+
// template <>
83+
// void RingModN<uint32_t>::mul_coef_to_buf(
84+
// uint32_t a,
85+
// uint32_t* src,
86+
// uint32_t* dest,
87+
// size_t len) const
88+
// {
89+
// simd::mul_coef_to_buf(a, src, dest, len, this->_card);
90+
// }
91+
//
92+
// template <>
93+
// void RingModN<uint32_t>::add_two_bufs(uint32_t* src, uint32_t* dest, size_t len)
94+
// const
95+
// {
96+
// simd::add_two_bufs(src, dest, len, this->_card);
97+
// }
98+
//
99+
// template <>
100+
// void RingModN<uint32_t>::sub_two_bufs(
101+
// uint32_t* bufa,
102+
// uint32_t* bufb,
103+
// uint32_t* res,
104+
// size_t len) const
105+
// {
106+
// simd::sub_two_bufs(bufa, bufb, res, len, this->_card);
107+
// }
108+
//
109+
// template <>
110+
// void RingModN<uint16_t>::mul_coef_to_buf(
111+
// uint16_t a,
112+
// uint16_t* src,
113+
// uint16_t* dest,
114+
// size_t len) const
115+
// {
116+
// simd::mul_coef_to_buf(a, src, dest, len, this->_card);
117+
// }
118+
//
119+
// template <>
120+
// void RingModN<uint16_t>::add_two_bufs(uint16_t* src, uint16_t* dest, size_t len)
121+
// const
122+
// {
123+
// simd::add_two_bufs(src, dest, len, this->_card);
124+
// }
125+
//
126+
// template <>
127+
// void RingModN<uint16_t>::sub_two_bufs(
128+
// uint16_t* bufa,
129+
// uint16_t* bufb,
130+
// uint16_t* res,
131+
// size_t len) const
132+
// {
133+
// simd::sub_two_bufs(bufa, bufb, res, len, this->_card);
134+
// }
135+
//
136+
// template <>
137+
// void RingModN<uint16_t>::hadamard_mul(int n, uint16_t* x_u16, uint16_t* y_u16)
138+
// const
139+
// {
140+
// simd::mul_two_bufs(y_u16, x_u16, n, this->_card);
141+
// }
142+
//
143+
// template <>
144+
// void RingModN<uint32_t>::hadamard_mul(int n, uint32_t* x_u32, uint32_t* y_u32)
145+
// const
146+
// {
147+
// simd::mul_two_bufs(y_u32, x_u32, n, this->_card);
148+
// }
119149

120150
} // namespace gf
121151
} // namespace quadiron

src/gf_ring.h

Lines changed: 58 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ class RingModN {
127127
T get_code_len(T n) const;
128128
T get_code_len_high_compo(T n) const;
129129
virtual void hadamard_mul(int n, T* x, T* y) const;
130-
virtual void neg(size_t n, T* x) const;
130+
virtual void neg(vec::Buffers<T>& buf, unsigned buf_id) const;
131131
virtual void neg(vec::Buffers<T>& buf) const;
132132

133133
RingModN(RingModN&&) = default;
@@ -410,7 +410,7 @@ inline void RingModN<T>::mul_vec_to_vecp(
410410
}
411411
} else {
412412
dest.copy(src, i, i);
413-
this->neg(len, dest_mem[i]);
413+
this->neg(dest, i);
414414
}
415415
}
416416
}
@@ -846,12 +846,23 @@ inline void RingModN<T>::hadamard_mul(int n, T* x, T* y) const
846846
}
847847

848848
template <typename T>
849-
inline void RingModN<T>::neg(size_t n, T* x) const
849+
inline void RingModN<T>::neg(vec::Buffers<T>& buf, unsigned buf_id) const
850850
{
851-
// add y to the first half of `x`
852-
for (size_t i = 0; i < n; i++) {
853-
x[i] = sub(0, x[i]);
851+
size_t size = buf.get_size();
852+
if (buf.has_meta()) {
853+
for (size_t i = 0; i < size; ++i) {
854+
T hi = 0, lo = 0;
855+
buf.get(buf_id, i, hi, lo);
856+
buf.set(buf_id, i, neg(hi), neg(lo));
857+
}
858+
} else {
859+
T* x = buf.get(buf_id);
860+
// add y to the first half of `x`
861+
for (size_t i = 0; i < size; ++i) {
862+
x[i] = sub(0, x[i]);
863+
}
854864
}
865+
855866
}
856867

857868
template <typename T>
@@ -868,7 +879,7 @@ inline void RingModN<T>::neg(vec::Buffers<T>& buf) const
868879
}
869880
} else {
870881
for (int i = 0; i < buf.get_n(); i++) {
871-
neg(size, buf.get(i));
882+
neg(buf, i);
872883
}
873884
}
874885
}
@@ -877,56 +888,51 @@ inline void RingModN<T>::neg(vec::Buffers<T>& buf) const
877888
/* Operations are vectorized by SIMD */
878889

879890
template <>
880-
void RingModN<uint16_t>::neg(size_t n, uint16_t* x) const;
881-
882-
template <>
883-
void RingModN<uint32_t>::neg(size_t n, uint32_t* x) const;
884-
885-
template <>
886-
void RingModN<uint16_t>::mul_coef_to_buf(
887-
uint16_t a,
888-
uint16_t* src,
889-
uint16_t* dest,
890-
size_t len) const;
891-
892-
template <>
893-
void RingModN<uint32_t>::mul_coef_to_buf(
894-
uint32_t a,
895-
uint32_t* src,
896-
uint32_t* dest,
897-
size_t len) const;
898-
899-
template <>
900-
void RingModN<uint16_t>::add_two_bufs(uint16_t* src, uint16_t* dest, size_t len)
901-
const;
902-
903-
template <>
904-
void RingModN<uint32_t>::add_two_bufs(uint32_t* src, uint32_t* dest, size_t len)
905-
const;
891+
void RingModN<uint16_t>::neg(vec::Buffers<uint16_t>& buf, unsigned buf_id) const;
906892

907893
template <>
908-
void RingModN<uint16_t>::sub_two_bufs(
909-
uint16_t* bufa,
910-
uint16_t* bufb,
911-
uint16_t* res,
912-
size_t len) const;
894+
void RingModN<uint32_t>::neg(vec::Buffers<uint32_t>& buf, unsigned buf_id) const;
913895

914-
template <>
915-
void RingModN<uint32_t>::sub_two_bufs(
916-
uint32_t* bufa,
917-
uint32_t* bufb,
918-
uint32_t* res,
919-
size_t len) const;
920-
921-
template <>
922-
void RingModN<uint16_t>::hadamard_mul(int n, uint16_t* x, uint16_t* y) const;
923-
template <>
924-
void RingModN<uint32_t>::hadamard_mul(int n, uint32_t* x, uint32_t* y) const;
925896
// template <>
926-
// void RingModN<uint64_t>::hadamard_mul(int n, uint64_t* x, uint64_t* y) const;
897+
// void RingModN<uint16_t>::mul_coef_to_buf(
898+
// uint16_t a,
899+
// uint16_t* src,
900+
// uint16_t* dest,
901+
// size_t len) const;
902+
//
903+
// template <>
904+
// void RingModN<uint32_t>::mul_coef_to_buf(
905+
// uint32_t a,
906+
// uint32_t* src,
907+
// uint32_t* dest,
908+
// size_t len) const;
909+
//
910+
// template <>
911+
// void RingModN<uint16_t>::add_two_bufs(uint16_t* src, uint16_t* dest, size_t len)
912+
// const;
913+
//
914+
// template <>
915+
// void RingModN<uint32_t>::add_two_bufs(uint32_t* src, uint32_t* dest, size_t len)
916+
// const;
917+
//
918+
// template <>
919+
// void RingModN<uint16_t>::sub_two_bufs(
920+
// uint16_t* bufa,
921+
// uint16_t* bufb,
922+
// uint16_t* res,
923+
// size_t len) const;
924+
//
925+
// template <>
926+
// void RingModN<uint32_t>::sub_two_bufs(
927+
// uint32_t* bufa,
928+
// uint32_t* bufb,
929+
// uint32_t* res,
930+
// size_t len) const;
931+
//
932+
// template <>
933+
// void RingModN<uint16_t>::hadamard_mul(int n, uint16_t* x, uint16_t* y) const;
927934
// template <>
928-
// void RingModN<__uint128_t>::hadamard_mul(int n, __uint128_t* x, __uint128_t*
929-
// y) const;
935+
// void RingModN<uint32_t>::hadamard_mul(int n, uint32_t* x, uint32_t* y) const;
930936

931937
#endif // #ifdef QUADIRON_USE_SIMD
932938

src/simd_ring.h

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -146,22 +146,23 @@ inline void mul_two_bufs(T* src, T* dest, size_t len, T card)
146146
/** Apply an element-wise negation to a buffer
147147
*/
148148
template <typename T>
149-
inline void neg(size_t len, T* buf, T card)
149+
inline void neg(vec::Buffers<T>& buf, unsigned buf_id, size_t simd_vec_len, T card)
150150
{
151-
VecType* _buf = reinterpret_cast<VecType*>(buf);
152-
const unsigned ratio = sizeof(*_buf) / sizeof(*buf);
153-
const size_t _len = len / ratio;
154-
const size_t _last_len = len - _len * ratio;
151+
T* data = buf.get(buf_id);
152+
uint8_t* meta = buf.get_meta(buf_id);
153+
VecType* vec_data = reinterpret_cast<VecType*>(data);
154+
MetaType* vec_meta = reinterpret_cast<MetaType*>(meta);
155155

156-
size_t i;
157-
for (i = 0; i < _len; i++) {
158-
_buf[i] = mod_neg(_buf[i], card);
159-
}
160-
if (_last_len > 0) {
161-
for (i = _len * ratio; i < len; i++) {
162-
if (buf[i])
163-
buf[i] = card - buf[i];
164-
}
156+
for (size_t i = 0; i < simd_vec_len; ++i) {
157+
VecType lo, hi;
158+
VecType x = load_to_reg(vec_data);
159+
160+
unpack<T>(vec_meta[i], x, hi, lo);
161+
hi = mod_neg(hi, card);
162+
lo = mod_neg(lo, card);
163+
pack<T>(lo, hi, x, vec_meta[i]);
164+
165+
store_to_mem(vec_data++, x);
165166
}
166167
}
167168

0 commit comments

Comments
 (0)