-
Notifications
You must be signed in to change notification settings - Fork 4
/
nx_deflate.c
2189 lines (1821 loc) · 64.7 KB
/
nx_deflate.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* NX-GZIP compression accelerator user library
* implementing zlib compression library interfaces
*
* Copyright (C) IBM Corporation, 2011-2017
*
* Licenses for GPLv2 and Apache v2.0:
*
* GPLv2:
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*
* Apache v2.0:
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Authors: Bulent Abali <[email protected]>
* Xiao Lei Hu <[email protected]>
*/
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <assert.h>
#include <errno.h>
#include <sys/fcntl.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <endian.h>
#include <pthread.h>
#include "zlib.h"
#include "nx_dbg.h"
#include "copy-paste.h"
#include "nx-ftw.h"
#include "nxu.h"
#include "nx_zlib.h"
#include "nx.h"
#include "nx_dbg.h"
#include "nx_dht.h"
#define DEF_MEM_LEVEL 8
#define nx_deflateInit(strm, level) nx_deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
#define DEF_MAX_DHT_LEN 288
#define DEF_HIST_LEN (1<<15)
#define DEF_MIN_INPUT_LEN (1UL<<16)
#define DEF_MAX_EXPANSION_LEN (2 * DEF_MIN_INPUT_LEN)
/* deflateSetDictionary constants */
#define DEF_MAX_DICT_LEN ((1L<<15)-272)
#define DEF_DICT_THRESHOLD (1<<8) /* TODO make this config variable */
#define fifo_out_len_check(s) \
do { if ((s)->cur_out > (s)->len_out/2) { \
memmove((s)->fifo_out, (s)->fifo_out + (s)->cur_out, (s)->used_out); \
(s)->cur_out = 0; } if ((s)->used_out == 0) { (s)->cur_out = 0; } \
} while(0)
#define fifo_in_len_check(s) \
do { if ((s)->cur_in > (s)->len_in/2) { \
memmove((s)->fifo_in, (s)->fifo_in + (s)->cur_in, (s)->used_in); \
(s)->cur_in = 0; } \
} while(0)
#define put_byte(s, c) {(s)->fifo_out[(s)->used_out++] = (Bytef)((c) & 0xff);}
#define put_short(s, b) do { \
put_byte((s), (Byte)(((b) >> 8) & 0xff)); \
put_byte((s), (Byte)(((b) >> 0) & 0xff)); \
} while(0)
#define put_int(s, b) do { \
put_byte((s), (Byte)(((b) >> 24) & 0xff)); \
put_byte((s), (Byte)(((b) >> 16) & 0xff)); \
put_byte((s), (Byte)(((b) >> 8) & 0xff)); \
put_byte((s), (Byte)(((b) >> 0) & 0xff)); \
} while(0)
#define NXGZIP_TYPE 9 /* 9 for P9 */
#define NX_MIN(X,Y) (((X)<(Y))?(X):(Y))
#define NX_MAX(X,Y) (((X)>(Y))?(X):(Y))
#define DBGLINE fprintf(stderr, "code at %s:%d\n", __FILE__, __LINE__ )
//#define ASSERT(X) assert(X)
#ifndef __unused
# define __unused __attribute__((unused))
#endif
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
/* config variables */
static const int nx_stored_block_len = 32768;
static uint32_t nx_max_byte_count_low = (1UL<<30);
static uint32_t nx_max_byte_count_high = (1UL<<30);
static uint32_t nx_max_source_dde_count = MAX_DDE_COUNT;
static uint32_t nx_max_target_dde_count = MAX_DDE_COUNT;
typedef int retlibnx_t;
typedef int retz_t;
typedef int retnx_t;
extern int nx_strategy_override;
/* **************************************************************** */
#define LIBNX_OK 0x00
#define LIBNX_OK_SUSPEND 0x01
#define LIBNX_OK_BIG_TARGET 0x02
#define LIBNX_OK_DRYRUN 0x03
#define LIBNX_OK_NO_AVOUT 0x04
#define LIBNX_OK_NO_AVIN 0x05
#define LIBNX_OK_STREAM_END 0x06
#define LIBNX_ERR_NO_MEM 0x10
#define LIBNX_ERR_PAGEFLT 0x20
#define LIBNX_ERR_ARG 0x30
#define LIBNX_ERR_HISTLEN 0x40
#define LIBNX_ERROR 0x50
/* Stream status borrowed from deflate.h */
#define NX_INIT_ST 0b000000 /* 0x00 */
#define NX_RAW_INIT_ST 0b000001 /* 0x01 deflateInit2() called */
#define NX_ZLIB_INIT_ST 0b000010 /* 0x02 deflateInit2() called */
#define NX_GZIP_INIT_ST 0b000100 /* 0x04 deflateInit2() called */
#define NX_DEFLATE_ST 0b001000 /* 0x08 deflate() called once */
#define NX_BFINAL_ST 0b010000 /* 0x10 bfinal was set */
#define NX_TRAILER_ST 0b100000 /* 0x20 trailers appended */
/*
Deflate block BFINAL bit.
*/
static inline void set_bfinal(void *buf, int bfinal, int offset)
{
char *b = buf;
if (bfinal)
*b = *b | (unsigned char) (1<<offset);
else
*b = *b & ~((unsigned char) (1<<offset));
}
/* Appends a type 00 block header starting at buf. If tebc is
nonzero, assumes that the byte buf-1 has free bits in it. It will
rewind buf by one byte to fill those free bits. Returns number of
appended bytes. Any fractional bits in buf-1 are not included in the
byte count. Set block_len=0 for sync or full flush empty blocks. */
static inline int append_btype00_header(char *buf, uint32_t tebc, int final, int block_len)
{
uint64_t flush, blen;
int32_t shift = (tebc & 0x7);
ASSERT(!!buf && tebc < 8);
ASSERT(block_len < 0x10000);
if (tebc > 0) {
/* last byte is partially full */
buf = buf - 1;
*buf = *buf & (unsigned char)((1<<tebc)-1);
}
else *buf = 0;
blen = (uint64_t) block_len; /* TODO check bi-endian support */
blen = 0xffffffffULL & ((~blen << 16) | blen); /* NLEN,LEN */
flush = ((0x1ULL & final) << shift) | *buf;
shift = shift + 3; /* BFINAL and BTYPE written */
shift = (shift <= 8) ? 8 : 16;
/* flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
flush |= blen << shift; /* blen length block */
shift = shift + 32;
while (shift > 0) {
*buf++ = (unsigned char)(flush & 0xffULL);
flush = flush >> 8;
shift = shift - 8;
}
return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
}
/*
TODO If tebc=0 do not append sync flush.
Do a sync flush followed by a single partial flush.
Sync flush ensures that the single partial flush test
succeeds (bolet.org describes 1 or 2 partials scenario)
When stream continues, start the new deflate() call
with a sync flush for byte alignment if required
*/
/*
* All flush functions assume that the current block has been
* closed. sync and full flush blocks are identical; treatment
* of the history are different
*/
static int inline append_sync_flush(char *buf, uint32_t tebc, int final)
{
uint64_t flush;
int32_t shift = (tebc & 0x7);
prt_info("%s tebc %d final %d\n", __FUNCTION__, tebc, final);
if (tebc > 0) {
/* last byte is partially full */
buf = buf - 1;
*buf = *buf & (unsigned char)((1<<tebc)-1);
}
else *buf = 0;
flush = ((0x1ULL & final) << shift) | *buf;
shift = shift + 3; /* BFINAL and BTYPE written */
shift = (shift <= 8) ? 8 : 16;
flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
shift = shift + 32;
while (shift > 0) {
*buf++ = (unsigned char)(flush & 0xffULL);
flush = flush >> 8;
shift = shift - 8;
}
/* bytes appended; excludes the padded partial byte */
return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
}
static int inline append_full_flush(char *buf, uint32_t tebc, int final)
{
return append_sync_flush(buf, tebc, final);
}
/*
* Appends 10 bits of partial flush and returns the new tebc in the
* argument. Returns bytes appended
*/
static int inline append_partial_flush(char *buf, uint32_t *tebc, int final)
{
uint64_t flush;
int32_t shift = (*tebc & 0x7);
int bytes = 0;
ASSERT(!!buf && *tebc < 8);
prt_info("%s tebc %d final %d\n", __FUNCTION__, *tebc, final);
if (*tebc > 0) {
/* last byte is partially full */
buf = buf - 1;
/* keep existing bits, mask out upper bits */
*buf = *buf & (unsigned char)((1<<*tebc)-1);
}
else *buf = 0;
/* write BFINAL=0|1 and BTYPE=01 and EOB=0000000 */
flush = (0x2ULL | (0x1ULL & final)) << shift;
shift = shift + 10;
*tebc = shift % 8; /* TODO check if we need tebc=8 later; 0/8 are same */
bytes = (shift == 10 || shift == 17)? 2: 1;
while (shift > 0) {
*buf++ = (unsigned char)(flush & 0xffULL);
flush = flush >> 8;
shift = shift - 8;
}
return bytes;
}
/*
When the flush block may cross over from the user buffer next_out to
the internal buffer fifo_out. returns number of bytes appended.
updates s->tebc
*/
static int append_spanning_flush(nx_streamp s, int flush, uint32_t tebc, int final)
{
char tmp[16]; /* issue 106 buffer overran */
char *ptr;
int nb, k;
uint32_t next_tebc = tebc;
prt_info("%s flush %d tebc %d final %d\n", __FUNCTION__, flush, tebc, final);
/* assumes fifo_out is empty */
ASSERT(s->used_out == 0 && s->cur_out == 0);
if (s->avail_out > 5 && (flush == Z_SYNC_FLUSH || flush == Z_FULL_FLUSH)) {
/* directly update the user stream */
nb = append_sync_flush(s->next_out, tebc, final);
s->tebc = 0;
update_stream_out(s, nb);
update_stream_out(s->zstrm, nb);
return nb;
}
/* the block spans next_out and fifo_out therefore using the tmp buffer */
/* copy last byte to tmp which may be partially empty */
if (tebc > 0) {
tmp[0] = *(s->next_out - 1);
}
ptr = &tmp[1];
if (flush == Z_SYNC_FLUSH || flush == Z_FULL_FLUSH) {
nb = append_sync_flush(ptr, tebc, final);
s->tebc = 0;
}
else if (flush == Z_PARTIAL_FLUSH) {
/* we always put TWO empty type 1 blocks */
/* nb = append_partial_flush(ptr, &next_tebc, 0); */
/* sync flush eliminates the need for testing for 1 or
2 partial flushes; see bolet.org */
nb = append_sync_flush(ptr, tebc, 0);
next_tebc = 0;
ptr += nb;
nb += append_partial_flush(ptr, &next_tebc, final);
/* save partial last byte bit count for later */
s->tebc = next_tebc;
}
else return 0;
/* put the filled partial byte back in to the stream */
if (tebc > 0) {
*(s->next_out - 1) = tmp[0];
}
/* now copy the flush block to the stream possibly
overflowing in to fifo_out */
k = 0;
/* copying in to user buffer starting from tmp[1] */
while (s->avail_out > 0 && k < nb) {
*s->next_out = tmp[k+1];
update_stream_out(s, 1);
update_stream_out(s->zstrm, 1);
++k;
}
/* overflowing any remainder in to fifo_out */
while (k < nb) {
*(s->fifo_out + s->cur_out + s->used_out) = tmp[k+1];
++k;
++s->used_out;
}
/* If next_tebc > 0 we cannot return a partial byte to the
user. We must withhold the last partial byte of
Z_PARTIAL_FLUSH and save it in fifo_out. When we're ready
to copy fifo_out to next_out, we should pad this partial
byte with a sync_flush (unless this is the final block).
TODO check user values total_out and avail_out for
consistency */
if (s->used_out == 0) {
/* we're here if there may be fractional byte stored
in next_out due to Z_PARTIAL_FLUSH; we must move
that byte in to fifo_out and expect it to be padded
later with a sync flush */
if (s->tebc > 0) {
ASSERT(flush == Z_PARTIAL_FLUSH);
/* because partial flush is 10 bits and it
follows byte aligned sync flush */
ASSERT(s->tebc == 2);
/* rewind */
update_stream_out(s, -1);
update_stream_out(s->zstrm, -1);
/* copy the partial byte to fifo_out */
*s->fifo_out = *s->next_out;
s->used_out = 1;
s->cur_out = 0;
-- nb;
}
}
return nb;
}
/* update the bfinal and len/nlen fields of an existing block header */
static int rewrite_spanning_flush(nx_streamp s, char *buf, uint32_t avail_out, uint32_t tebc, int final, uint32_t block_len)
{
char tmp[6];
char *ptr;
int nb, j, k;
if (avail_out > 5) {
/* directly update the user stream */
nb = append_btype00_header(buf, tebc, final, block_len);
return nb;
}
/* the block span next_out and fifo_out therefore using the tmp buffer */
/* copy last byte to tmp which may be partially empty */
if (tebc > 0) tmp[0] = *(buf - 1);
ptr = &tmp[1];
nb = append_btype00_header(ptr, tebc, final, block_len);
/* put the filled partial byte back in to the stream */
if (tebc > 0) *(buf - 1) = tmp[0];
/* now copy the flush block to the stream possibly
overflowing in to fifo_out */
k = 0;
while (avail_out > 0 && k < 4) {
*buf++ = tmp[k+1];
++k;
--avail_out;
}
/* overflowing any remainder in to fifo_out */
j = 0;
while (k < 4) {
*(s->fifo_out + j) = tmp[k+1];
++k; ++j;
}
return nb;
}
static inline int nx_compress_append_trailer(nx_streamp s)
{
int k;
if (s->wrap == HEADER_GZIP) {
uint32_t isize = s->total_in & ((1ULL<<32)-1);
uint32_t cksum = s->crc32;
prt_info("append gzip trailer crc32 %08x adler32 %08x, s->total_out %ld\n", s->crc32, s->adler32, s->total_out);
/* TODO hto32le */
/* TODO Edelsohn says unaligned load/store ok
if not crossing page boundary */
k=0;
while (k++ < 4) {
nx_put_byte(s, (cksum & 0xFF000000) >> 24);
cksum = cksum << 8;
}
prt_info("s->total_out %ld k %d\n", s->total_out, k);
k=0;
while (k++ < 4) {
prt_info("%02x\n", isize & 0xFF);
nx_put_byte(s, isize & 0xFF);
isize = isize >> 8;
}
prt_info("s->total_out %ld\n", s->total_out);
return k;
}
else if (s->wrap == HEADER_ZLIB) {
uint32_t cksum = s->adler32;
prt_info("append zlib trailer crc32 %08x adler32 %08x, s->total_out %ld\n", s->crc32, s->adler32, s->total_out);
/* TODO hto32le */
k=0;
while (k++ < 4) {
nx_put_byte(s, (cksum & 0xFF000000) >> 24);
cksum = cksum << 8;
}
return k;
}
else if (s->wrap == HEADER_RAW) {
prt_info("raw format, no trailer, crc32 %08x adler32 %08x, s->total_out %ld\n", s->crc32, s->adler32, s->total_out);
}
return 0;
}
/* Updates LEN/NLEN field of a block, we use this after we have written
the block header then we must update the len nlen fields later. avail_out
is the bytes available in next_in starting from len_nlen. if the block
header overflows in to fifo_out, this routine will handle the overflow.
block_len is less than or equal to 1<<16-1
*/
static int update_block_len(nx_streamp s, char *buf, uint32_t avail_out, uint32_t block_len)
{
uint64_t blen;
int j,k,shift;
char tmp[6];
char *ptr;
ASSERT(block_len < 0x10000);
blen = (uint64_t) block_len;
blen = ((1ULL<<32)-1) & ((~blen << 16) | blen); /* NLEN,LEN TODO check bi-endian */
shift = 32;
if (avail_out > 5)
ptr = buf;
else
ptr = tmp;
k = 0;
while (shift > 0) {
ptr[k++] = (unsigned char)(blen & 0xffULL);
blen = blen >> 8;
shift = shift - 8;
}
if (avail_out > 5)
return 4;
/* copy back to the stream possibly
overflowing in to fifo_out */
k = 0;
while (avail_out > 0 && k < 4) {
*buf++ = tmp[k++];
--avail_out;
}
/* overflowing any remainder in to fifo_out */
j = 0;
while (k < 4) {
*(s->fifo_out + j++) = tmp[k++];
}
return 4;
}
/* Prepares a blank no filename no timestamp gzip header and returns
the number of bytes written to buf;
https://tools.ietf.org/html/rfc1952 */
int gzip_header_blank(char *buf)
{
int i=0;
ASSERT(!!buf);
buf[i++] = 0x1f; /* ID1 */
buf[i++] = 0x8b; /* ID2 */
buf[i++] = 0x08; /* CM */
buf[i++] = 0x00; /* FLG */
buf[i++] = 0x00; /* MTIME */
buf[i++] = 0x00; /* MTIME */
buf[i++] = 0x00; /* MTIME */
buf[i++] = 0x00; /* MTIME */
buf[i++] = 0x04; /* XFL 4=fastest */
buf[i++] = 0x03; /* OS UNIX */
return i;
}
static retnx_t nx_get_dde_byte_count(nx_dde_t *d, uint32_t *indirect_count, uint32_t *dde_byte_count)
{
u32 icount = getpnn(d, dde_count);
ASSERT(!!d && !!dde_byte_count && !!indirect_count);
*dde_byte_count = 0;
*indirect_count = icount;
if (icount == 0) {
/* direct dde */
*dde_byte_count = getp32(d, ddebc);
/* TODO max byte count thresholding */
}
else {
/* In an indirect DDE, the DDEad is a pointer to
the first DDE of a contiguous set of DDEcount direct DDE(s) */
int i;
u32 total_dde_bytes = 0;
nx_dde_t *dde_list = (nx_dde_t *) getp64(d, ddead); /* list base */
if (icount > nx_max_source_dde_count)
return ERR_NX_EXCESSIVE_DDE;
for (i=0; i < icount; i++) {
/* printf("ddelist %d ddecount %08x ddebc %08x ddead %08lx\n", i,
dde_list[i].dde_count, dde_list[i].ddebc, dde_list[i].ddead); */
total_dde_bytes += get32(dde_list[i], ddebc);
if (getnn(dde_list[i], dde_count) != 0)
return ERR_NX_SEGMENTED_DDL;
}
/* if (total_dde_bytes < d->ddebc)
return ERR_NX_DDE_OVERFLOW; */
*dde_byte_count = total_dde_bytes;
/* last dde may be partially full;
what happens when d->ddebc is so short it doesn't reach
last couple ddes? */
if (total_dde_bytes != getp32(d, ddebc)) {
printf("WARN: ddebc mismatch\n");
}
}
return ERR_NX_OK;
}
/* Read source data from direct or indirect dde.
* Return data in a contiguous buffer internally allocated.
* Return amount of data copied in *size.
* Caller must free the data buffer. Return >= 0 for NX codes.
* Return < 0 for programming errors
*/
static retnx_t nx_copy_dde_to_buffer(nx_dde_t *d, char **data, uint32_t *size)
{
u32 indirect_count;
u32 dde_byte_count;
int actual_byte_count;
int cc;
ASSERT(!!d && !!data && !!size);
ASSERT(!!((void *)getp64(d, ddead)));
*data = NULL;
*size = 0;
if ((cc = nx_get_dde_byte_count(d, &indirect_count, &dde_byte_count)) != ERR_NX_OK)
return cc;
actual_byte_count = (int) getp32(d, ddebc);
if (dde_byte_count < actual_byte_count)
return ERR_NX_DDE_OVERFLOW;
*size = (uint32_t) actual_byte_count;
if (indirect_count == 0) {
/* direct dde */
char *tmp;
ASSERT(!!(tmp = malloc(actual_byte_count))); /* caller frees memory */
*data = tmp;
memcpy(tmp, (char *) getp64(d, ddead), actual_byte_count);
/* TODO max byte count thresholding */
}
else {
int i;
char *tmp;
u32 offset=0;
nx_dde_t *dde_list = (nx_dde_t *) getp64(d, ddead); /* list base */
ASSERT(!!(tmp = malloc(actual_byte_count)));
*data = tmp;
for (i = 0; i < indirect_count; i++) {
char *buf;
u32 per_dde_bytes;
buf = (char *) get64(dde_list[i], ddead);
per_dde_bytes = get32(dde_list[i], ddebc);
memcpy(tmp+offset, buf, NX_MIN(per_dde_bytes, actual_byte_count));
offset += per_dde_bytes;
actual_byte_count -= (int) per_dde_bytes;
if (actual_byte_count <= 0) break; /* all the valid data copied */
}
}
return ERR_NX_OK;
}
/* Read source data from a contigious memory and copy in to
* direct or indirect dde.
* Caller must supply all memory.
*/
static int nx_copy_buffer_to_dde(nx_dde_t *d, char *data, uint32_t size)
{
u32 indirect_count;
u32 dde_byte_count;
int cc;
ASSERT(!!d && !!data);
ASSERT(!!((void *)getp64(d, ddead)));
if ((cc = nx_get_dde_byte_count(d, &indirect_count, &dde_byte_count)) != ERR_NX_OK)
return cc;
if (indirect_count == 0) {
/* direct dde */
if (size > dde_byte_count)
return ERR_NX_TARGET_SPACE;
memcpy((char *) getp64(d, ddead), data, size);
}
else {
int i;
u32 offset = 0;
int actual_byte_count = size;
nx_dde_t *dde_list = (nx_dde_t *) getp64(d, ddead); /* list base */
if (size > dde_byte_count )
return ERR_NX_TARGET_SPACE;
for (i = 0; i < indirect_count; i++) {
char *buf;
u32 per_dde_bytes;
buf = (char *) get64(dde_list[i], ddead);
per_dde_bytes = get32(dde_list[i], ddebc);
memcpy(buf, data+offset, NX_MIN(per_dde_bytes, actual_byte_count));
offset += per_dde_bytes;
actual_byte_count -= per_dde_bytes;
if (actual_byte_count <= 0) break; /* all the valid data copied */
}
}
return ERR_NX_OK;
}
/*
Append CRC, ADLER, ISIZE
*/
static retlibnx_t nx_to_zstrm_trailer(void *buf, uint32_t cksum)
{
int i;
char *b = buf;
if (buf == NULL)
return LIBNX_ERR_ARG;
cksum = htole32(cksum);
for (i=0; i<4; i++)
*(b+i) = (cksum >> i*8) & 0xFF;
return LIBNX_OK;
}
static int nx_deflateResetKeep(z_streamp strm)
{
nx_streamp s;
strm->total_in = strm->total_out = 0;
strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
strm->data_type = Z_UNKNOWN;
s = (nx_streamp) strm->state;
s->total_in = s->total_out = 0;
if (s->wrap < 0) {
s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
}
if (s->wrap == 0) s->status = NX_RAW_INIT_ST;
else if (s->wrap == 1) s->status = NX_ZLIB_INIT_ST;
else if (s->wrap == 2) s->status = NX_GZIP_INIT_ST;
s->len_out = nx_config.deflate_fifo_out_len;
if (s->strategy == Z_DEFAULT_STRATEGY && s->dhthandle == NULL)
s->dhthandle = dht_begin(NULL, NULL);
s->used_in = s->used_out = 0;
s->cur_in = s->cur_out = 0;
s->tebc = 0;
s->is_final = 0;
s->ddl_in = s->dde_in;
s->ddl_out = s->dde_out;
s->crc32 = INIT_CRC;
s->adler32 = INIT_ADLER;
s->need_stored_block = 0;
s->dict_len = 0;
if (s->wrap == 1) strm->adler = s->adler32;
else if (s->wrap == 2) strm->adler = s->crc32;
s->invoke_cnt = 0;
return Z_OK;
}
int nx_deflateReset(z_streamp strm)
{
if (strm == Z_NULL)
return Z_STREAM_ERROR;
return nx_deflateResetKeep(strm);
}
int nx_deflateEnd(z_streamp strm)
{
int status;
nx_streamp s;
if (strm == Z_NULL)
return Z_STREAM_ERROR;
s = (nx_streamp) strm->state;
if (s == NULL)
return Z_STREAM_ERROR;
/* statistic*/
zlib_stats_inc(&zlib_stats.deflateEnd);
status = s->status;
/* TODO add here Z_DATA_ERROR if the stream was freed
prematurely (when some input or output was discarded). */
dht_end(s->dhthandle);
nx_free_buffer(s->fifo_in, s->len_in, 0);
nx_free_buffer(s->fifo_out, s->len_out, 0);
nx_free_buffer(s->dict, s->dict_alloc_len, 0);
nx_close(s->nxdevp);
nx_free_buffer(s, sizeof(*s), 0);
/* FIXME check for correctness */
return (status == NX_DEFLATE_ST) ? Z_DATA_ERROR : Z_OK;
}
int nx_deflateInit_(z_streamp strm, int level, const char* version, int stream_size)
{
return nx_deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, version, stream_size);
}
int nx_deflateInit2_(z_streamp strm, int level, int method, int windowBits,
int memLevel, int strategy, const char *version,
int stream_size)
{
int rc;
int wrap;
nx_streamp s;
nx_devp_t h;
nx_hw_init();
if (strm == Z_NULL) return Z_STREAM_ERROR;
/* statistic */
zlib_stats_inc(&zlib_stats.deflateInit);
strm->msg = Z_NULL;
strm->total_in = 0;
strm->total_out = 0;
/* NX can only do a window size of 15 (32KB). Other window
sizes may be simulated by making full flush blocks with the
same size as the window size */
if (windowBits != 15 && windowBits != 31 && windowBits != -15) {
prt_info("NX does not support less than 2^15 byte window size: %d\n", windowBits);
/* TODO should I ignore small window request? */
return Z_STREAM_ERROR;
}
if (windowBits < 0) { /* suppress zlib wrapper */
wrap = HEADER_RAW;
windowBits = -windowBits;
}
else if (windowBits > 15) {
wrap = HEADER_GZIP; /* write gzip wrapper instead */
windowBits -= 16;
}
else wrap = HEADER_ZLIB;
prt_info(" windowBits %d wrap %d \n", windowBits, wrap);
if (method != Z_DEFLATED || (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY)) {
prt_err("unsupported zlib method or strategy\n");
return Z_STREAM_ERROR;
}
h = nx_open(-1); /* TODO allow picking specific NX device */
if (!h) {
prt_err("cannot open NX device\n");
return Z_STREAM_ERROR;
}
/* only support level 6 here */
level = 6;
s = nx_alloc_buffer(sizeof(*s), nx_config.page_sz, 0);
if (s == NULL) return Z_MEM_ERROR;
memset(s, 0, sizeof(*s));
s->nxcmdp = &s->nxcmd0;
s->wrap = wrap;
s->windowBits = windowBits;
s->level = level;
s->method = method;
s->strategy = strategy;
if (s->strategy == Z_FIXED || nx_strategy_override == 0)
s->strategy = Z_FIXED;
else
s->strategy = Z_DEFAULT_STRATEGY;
s->zstrm = strm; /* pointer to parent */
s->page_sz = nx_config.page_sz;
s->nxdevp = h;
s->gzhead = NULL;
s->fifo_in = NULL;
s->len_in = 0;
s->dict = NULL;
s->dict_len = 0;
s->len_out = nx_config.deflate_fifo_out_len;
s->len_out = NX_MAX(s->len_out, DEF_MAX_EXPANSION_LEN);
if (NULL == (s->fifo_out = nx_alloc_buffer(s->len_out, nx_config.page_sz, 0)))
return Z_MEM_ERROR;
if (s->strategy == Z_DEFAULT_STRATEGY && s->dhthandle == NULL)
s->dhthandle = dht_begin(NULL, NULL);
s->used_in = s->used_out = 0;
s->cur_in = s->cur_out = 0;
s->tebc = 0;
s->ddl_in = s->dde_in;
s->ddl_out = s->dde_out;
strm->state = (void *) s; /* remember the hardware state */
rc = nx_deflateReset(strm);
return rc;
}
/*
* if fifo_out has data waiting, copy used_out bytes to the next_out first.
*/
static int nx_copy_fifo_out_to_nxstrm_out(nx_streamp s)
{
uint32_t copy_bytes;
if (s->used_out == 0 || s->avail_out == 0) return LIBNX_OK_NO_AVOUT;
/* do not copy more than the available user buffer */
copy_bytes = NX_MIN(s->used_out, s->avail_out);
memcpy(s->next_out, s->fifo_out + s->cur_out, copy_bytes);
update_stream_out(s, copy_bytes);
update_stream_out(s->zstrm, copy_bytes);
s->used_out -= copy_bytes;
s->cur_out += copy_bytes;
fifo_out_len_check(s);
if (s->tebc > 0 && s->used_out == 0 && !(s->status & (NX_BFINAL_ST|NX_TRAILER_ST))) {
/* byte align the tail when fifo_out is copied entirely to next_out */
ASSERT(s->cur_out == 0);
prt_info("%s tebc %d\n", __FUNCTION__, s->tebc);
append_spanning_flush(s, Z_SYNC_FLUSH, s->tebc, 0);
}
return LIBNX_OK;
}
/*
from zlib.h: deflate() sets strm->adler to the Adler-32 checksum of
all input read so far (that is, total_in bytes). If a gzip stream
is being generated, then strm->adler will be the CRC-32 checksum of
the input read so far. (See deflateInit2 below.)
This adds a wrinkle to our buffering approach. If accumulate
input data in fifo_in how do we compute crc?
*/
/*
from zlib.h The application must update next_in and avail_in when
avail_in has dropped to zero. It must update next_out and
avail_out when avail_out has dropped to zero.
- Compress more input starting at next_in and update next_in and avail_in
accordingly. If not all input can be processed (because there is not
enough room in the output buffer), next_in and avail_in are updated and
processing will resume at this point for the next call of deflate().
- Generate more output starting at next_out and update next_out and avail_out
accordingly. This action is forced if the parameter flush is non zero.
Forcing flush frequently degrades the compression ratio, so this parameter
should be set only when necessary. Some output may be provided even if
flush is zero.
*/
/*
When small number of bytes are in strm, copy them to fifo instead of
using NX to DMA them. Returns number of bytes copied.
*/
static inline void small_copy_nxstrm_in_to_fifo_in(nx_streamp s)
{
uint32_t free_bytes, copy_bytes;
free_bytes = s->len_in/2 - s->cur_in - s->used_in;
copy_bytes = NX_MIN(free_bytes, s->avail_in);
memcpy(s->fifo_in + s->cur_in + s->used_in, s->next_in, copy_bytes);
update_stream_in(s, copy_bytes);
update_stream_in(s->zstrm, copy_bytes);
s->used_in += copy_bytes;
}
/* unused */
static int nx_copy_nxstrm_in_to_fifo_in(nx_streamp s)
{
int rc;
uint32_t ask, len;
uint32_t first_bytes, last_bytes, copy_bytes;
ask = len = s->avail_in;
first_bytes = fifo_free_first_bytes(s->cur_in,
s->used_in,
s->len_in);
last_bytes = fifo_free_last_bytes(s->cur_in,
s->used_in,
s->len_in);
copy_bytes = NX_MIN(first_bytes, len);
if (copy_bytes > 0) {
rc = nx_copy(s->fifo_in + s->cur_in,
s->next_in,
copy_bytes, NULL, NULL,
s->nxdevp);
if (rc != LIBNX_OK ) {
memcpy(s->fifo_in + s->cur_in, s->next_in, copy_bytes);
prt_err("nx_copy failed\n");
}
s->used_in += copy_bytes;
len = len - copy_bytes;