-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmod_cache_disk_largefile.c
4099 lines (3522 loc) · 140 KB
/
mod_cache_disk_largefile.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Originated as patches against Apache HTTPD 2.2 mod_disk_cache but quickly
* diverged enough to be deemed not applicable for merging upstream.
* Eventually broken out as the stand-alone module mod_cache_disk_largefile
* in 2012.
*/
#include "apr_lib.h"
#include "apr_file_io.h"
#include "apr_strings.h"
#include "mod_cache.h"
#include "mod_cache_disk_largefile.h"
#include "http_config.h"
#include "http_log.h"
#include "http_core.h"
#include "ap_provider.h"
#include "util_filter.h"
#include "util_script.h"
#include "util_charset.h"
#include "ap_mpm.h"
#include "mpm_common.h"
#include "apr_portable.h"
#include "http_main.h"
#include "http_request.h"
#if !APR_HAS_THREADS
#error This module requires thread support
#endif /* !APR_HAS_THREADS */
/*
* mod_cache_disk_largefile: Disk Based HTTP 1.1 Cache.
*
* Flow to Find the right cache file:
* Incoming client requests an URL
* Generate <hash>.header from URL
* Open <hash>.header
* Read in <hash>.header file format identifier, which might be:
* VARY_FORMAT_VERSION - Vary headers
* DISK_FORMAT_VERSION - Metadata and headers for a cached file
* Anything else - Unknown header format, remove and return.
*
* If VARY_FORMAT_VERSION (Contains a list of Vary Headers):
* Use each header name with our request values (headers_in) to
* regenerate <hash>.header using HeaderName+HeaderValue+URL,
* open it, read format (must be DISK_FORMAT_VERSION).
*
* VARY_FORMAT_VERSION:
* apr_uint32_t format;
* apr_time_t expire;
* apr_array_t vary_headers (delimited by CRLF)
*
* DISK_FORMAT_VERSION:
* disk_cache_info_t
* entity name (dobj->name) [length is in disk_cache_info_t->name_len]
* bodyfile (dobj->bodyfile) [length is in disk_cache_info_t->bodyname_len]
* optional filename (r->filename)
* [length is in disk_cache_info_t->filename_len]
* r->headers_out (see on disk header format below)
* r->headers_in
*
* On disk headers are stored in the following format:
* apr_uint32_t totsize; - size of headers to follow
* totsize amount of headers, HeaderA\0ValueA\0...HeaderN\0ValueN\0
*/
module AP_MODULE_DECLARE_DATA cache_disk_largefile_module;
/* Emulate RCS $Id$, simply because it's handy to be able to run ident
on an executable/library/etc and see the version.
*/
static const char rcsid[] = "$Id: " __FILE__ " " GIT_SOURCE_DESC " $";
/* Forward declarations */
static int remove_entity(cache_handle_t *h);
static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *in,
apr_bucket_brigade *out);
static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
apr_file_t *file);
/*
* Modified file bucket implementation to be able to deliver files
* while caching.
*/
/* Derived from apr_buckets_file.c */
#define BUCKET_IS_DISKCACHE(e) ((e)->type == &bucket_type_diskcache)
static const apr_bucket_type_t bucket_type_diskcache;
static void diskcache_bucket_destroy(void *data)
{
diskcache_bucket_data *f = data;
if (apr_bucket_shared_destroy(f)) {
/* no need to close files here; it will get
* done automatically when the pool gets cleaned up */
apr_bucket_free(f);
}
}
/* The idea here is to convert diskcache buckets to regular file buckets
as data becomes available. We want to keep being called if the backing
file isn't complete, so don't return too large chunks or we won't be
called for a long time when slow connections are involved. This would
in turn prevent us from returning the entire file as a file bucket,
which is the requirement for event mpm async write completion to kick in.
*/
static apr_status_t diskcache_bucket_read(apr_bucket *e, const char **str,
apr_size_t *len,
apr_read_type_e block)
{
diskcache_bucket_data *a = e->data;
apr_file_t *f = a->fd;
apr_bucket *b = NULL;
char *buf;
apr_status_t rv;
apr_finfo_t finfo;
apr_size_t filelength = e->length; /* bytes remaining in file past offset */
apr_off_t fileoffset = e->start;
apr_size_t available;
apr_time_t start = apr_time_now();
apr_interval_time_t loopdelay = CACHE_LOOP_MINSLEEP;
#if APR_HAS_THREADS && !APR_HAS_XTHREAD_FILES
apr_int32_t flags;
#endif
#if APR_HAS_THREADS && !APR_HAS_XTHREAD_FILES
if ((flags = apr_file_flags_get(f)) & APR_FOPEN_XTHREAD) {
/* this file descriptor is shared across multiple threads and
* this OS doesn't support that natively, so as a workaround
* we must reopen the file into a->readpool */
const char *fname;
apr_file_name_get(&fname, f);
rv = apr_file_open(&f, fname, (flags & ~APR_FOPEN_XTHREAD), 0,
a->readpool);
if (rv != APR_SUCCESS)
return rv;
a->fd = f;
}
#endif
/* in case we die prematurely */
*str = NULL;
*len = 0;
if (APLOGtrace4(ap_server_conf)) {
ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
"Called diskcache_bucket_read. block: %d fd: %pp "
"fd pool: %pp readpool: %pp "
"lastdata: %" APR_TIME_T_FMT ".%06" APR_TIME_T_FMT,
block, f, apr_file_pool_get(f), a->readpool,
apr_time_sec(a->lastdata), apr_time_usec(a->lastdata));
}
while(1) {
/* Figure out how big the file is right now, sit here until
it's grown enough or we get bored */
rv = apr_file_info_get(&finfo,
APR_FINFO_SIZE | APR_FINFO_MTIME | APR_FINFO_NLINK, f);
if(rv != APR_SUCCESS) {
return rv;
}
/* The filesize has to be beyond our current offset or we'll end
up with fun negative numbers in the wrong place ;) */
if(finfo.size > fileoffset) {
available = S_MIN(filelength, finfo.size-fileoffset);
}
else {
available = 0;
}
/* Always be content if we have the complete backing file */
if(available >= filelength) {
break;
}
/* No use to even wait for a deleted file */
if(finfo.nlink == 0) {
return APR_EGENERAL;
}
/* Non-blocking reads can retry */
if(block == APR_NONBLOCK_READ) {
return APR_EAGAIN;
}
/* Blocking, ie. urgent, reads gets the MAXCHUNK if available */
if(available >= CACHE_BUCKET_MAXCHUNK) {
available = CACHE_BUCKET_MAXCHUNK;
break;
}
/* Otherwise deliver what's there if larger than MINCHUNK after
PREFERWAIT microseconds since the last time we returned data */
if(available >= CACHE_BUCKET_MINCHUNK
&&
a->lastdata + CACHE_BUCKET_PREFERWAIT_BLOCK < apr_time_now())
{
break;
}
/* Check for timeout */
if(finfo.mtime < (apr_time_now() - a->updtimeout) ) {
return APR_EGENERAL;
}
/* If we have progress within half the timeout period, return what
we have so far */
if(available > 0 &&
start < (apr_time_now() - a->updtimeout/2) )
{
break;
}
apr_sleep(loopdelay);
loopdelay <<= 1;
if(loopdelay > CACHE_LOOP_MAXSLEEP) {
loopdelay = CACHE_LOOP_MAXSLEEP;
}
}
/* Convert this bucket to a zero-length heap bucket so we won't be called
again */
buf = apr_bucket_alloc(0, e->list);
apr_bucket_heap_make(e, buf, 0, apr_bucket_free);
/* Wrap available data into a regular file bucket */
/* FIXME: This doesn't cater for CACHE_MAX_BUCKET, ie 32bit platforms,
which might need splitting into multiple buckets. */
b = apr_bucket_file_create(f, fileoffset, available, a->readpool, e->list);
APR_BUCKET_INSERT_AFTER(e, b);
/* Record that we returned data */
a->lastdata = apr_time_now();
if (APLOGtrace4(ap_server_conf)) {
ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
"diskcache_bucket_read: Converted to regular file"
" off %" APR_OFF_T_FMT " len %" APR_SIZE_T_FMT,
fileoffset, available);
}
/* Put any remains in yet another bucket */
if(available < filelength) {
e=b;
/* for efficiency, we can just build a new apr_bucket struct
* to wrap around the existing bucket */
b = apr_bucket_alloc(sizeof(*b), e->list);
b->start = fileoffset + available;
b->length = filelength - available;
b->data = a;
b->type = &bucket_type_diskcache;
b->free = apr_bucket_free;
b->list = e->list;
APR_BUCKET_INSERT_AFTER(e, b);
}
else {
diskcache_bucket_destroy(a);
if (APLOGtrace4(ap_server_conf)) {
ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
"diskcache_bucket_read: done with this bucket.");
}
}
*str = buf;
return APR_SUCCESS;
}
static apr_bucket * diskcache_bucket_make(apr_bucket *b,
apr_file_t *fd,
apr_off_t offset,
apr_size_t len,
apr_interval_time_t timeout,
apr_pool_t *p)
{
diskcache_bucket_data *f;
f = apr_bucket_alloc(sizeof(*f), b->list);
f->fd = fd;
f->readpool = p;
f->updtimeout = timeout;
f->lastdata = 0;
b = apr_bucket_shared_make(b, f, offset, len);
b->type = &bucket_type_diskcache;
return b;
}
static apr_bucket * diskcache_bucket_create(apr_file_t *fd,
apr_off_t offset,
apr_size_t len,
apr_interval_time_t timeout,
apr_pool_t *p,
apr_bucket_alloc_t *list)
{
apr_bucket *b = apr_bucket_alloc(sizeof(*b), list);
APR_BUCKET_INIT(b);
b->free = apr_bucket_free;
b->list = list;
return diskcache_bucket_make(b, fd, offset, len, timeout, p);
}
/* FIXME: This is probably only correct for the first case, that seems
to be the one that occurs all the time... */
static apr_status_t diskcache_bucket_setaside(apr_bucket *data,
apr_pool_t *reqpool)
{
diskcache_bucket_data *a = data->data;
apr_file_t *fd = NULL;
apr_file_t *f = a->fd;
apr_pool_t *curpool = apr_file_pool_get(f);
if (apr_pool_is_ancestor(curpool, reqpool)) {
return APR_SUCCESS;
}
if (!apr_pool_is_ancestor(a->readpool, reqpool)) {
/* FIXME: Figure out what needs to be done here */
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
"diskcache_bucket_setaside: FIXME1");
a->readpool = reqpool;
}
/* FIXME: Figure out what needs to be done here */
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
"diskcache_bucket_setaside: FIXME2");
apr_file_setaside(&fd, f, reqpool);
a->fd = fd;
return APR_SUCCESS;
}
static const apr_bucket_type_t bucket_type_diskcache = {
"DISKCACHE", 5, APR_BUCKET_DATA,
diskcache_bucket_destroy,
diskcache_bucket_read,
diskcache_bucket_setaside,
apr_bucket_shared_split,
apr_bucket_shared_copy
};
static apr_bucket * diskcache_brigade_insert(apr_bucket_brigade *bb,
apr_file_t *f, apr_off_t
start, apr_off_t length,
apr_interval_time_t timeout,
apr_pool_t *p)
{
apr_bucket *e;
if (length < CACHE_BUCKET_MAX) {
e = diskcache_bucket_create(f, start, (apr_size_t)length, timeout, p,
bb->bucket_alloc);
}
else {
/* Several buckets are needed. */
e = diskcache_bucket_create(f, start, CACHE_BUCKET_MAX, timeout, p,
bb->bucket_alloc);
while (length > CACHE_BUCKET_MAX) {
apr_bucket *ce;
apr_bucket_copy(e, &ce);
APR_BRIGADE_INSERT_TAIL(bb, ce);
e->start += CACHE_BUCKET_MAX;
length -= CACHE_BUCKET_MAX;
}
e->length = (apr_size_t)length; /* Resize just the last bucket */
}
APR_BRIGADE_INSERT_TAIL(bb, e);
return e;
}
/* --------------------------------------------------------------- */
/*
* Local static functions
*/
static char *cache_file(apr_pool_t *p, disk_cache_conf *conf,
const char *prefix, const char *name,
const char *suffix)
{
char *hashfile;
hashfile = ap_cache_generate_name(p, DEFAULT_DIRLEVELS, DEFAULT_DIRLENGTH,
name);
/* This assumes that we always deal with Vary-stuff if there's a prefix */
if (prefix) {
return apr_pstrcat(p, prefix, CACHE_VDIR_SUFFIX, "/",
hashfile, suffix, NULL);
}
else {
return apr_pstrcat(p, conf->cache_root, "/", hashfile, suffix, NULL);
}
}
static apr_status_t mkdir_structure(const char *file, apr_pool_t *pool)
{
apr_status_t rv;
char *p;
int i;
p = strrchr((char *)file, '/');
if(!p) {
return APR_EGENERAL;
}
*p = '\0';
/* Be stubborn to overcome racyness when others deletes directories
while we're trying to create them */
for(i=0; i < 10; i++) {
rv = apr_dir_make_recursive(file,
APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool);
if(rv == APR_SUCCESS) {
break;
}
}
*p = '/';
return rv;
}
/* htcacheclean may remove directories underneath us.
* So, we'll try renaming three times at a cost of 0.002 seconds.
*/
static apr_status_t safe_file_rename(const char *src, const char *dest,
apr_pool_t *pool)
{
apr_status_t rv;
rv = apr_file_rename(src, dest, pool);
if (rv != APR_SUCCESS) {
int i;
for (i = 0; i < 2 && rv != APR_SUCCESS; i++) {
rv = mkdir_structure(dest, pool);
if (rv != APR_SUCCESS)
continue;
rv = apr_file_rename(src, dest, pool);
if(rv != APR_SUCCESS) {
/* 1000 micro-seconds aka 0.001 seconds. */
apr_sleep(1000);
}
}
}
return rv;
}
/* dobj->errcleanflags determines if non-temporary files gets deleted */
static apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj,
request_rec *r)
{
apr_status_t rc;
if(dobj->hfd) {
apr_file_close(dobj->hfd);
dobj->hfd = NULL;
}
if( (dobj->errcleanflags & ERRCLEAN_HEADER && dobj->hdrsfile) ) {
rc = apr_file_remove(dobj->hdrsfile, r->pool);
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r,
"file_cache_errorcleanup: Removed %s", dobj->hdrsfile);
/* Clear flag so we don't try again if called twice */
dobj->errcleanflags &= ~ERRCLEAN_HEADER;
}
if(dobj->bfd_read) {
apr_file_close(dobj->bfd_read);
dobj->bfd_read = NULL;
}
if(dobj->bfd_write) {
apr_file_close(dobj->bfd_write);
dobj->bfd_write = NULL;
}
if( (dobj->errcleanflags & ERRCLEAN_BODY && dobj->bodyfile) ) {
rc = apr_file_remove(dobj->bodyfile, r->pool);
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r,
"file_cache_errorcleanup: Removed %s", dobj->bodyfile);
/* Clear flag so we don't try again if called twice */
dobj->errcleanflags &= ~ERRCLEAN_BODY;
}
if (dobj->tfd) {
apr_file_close(dobj->tfd);
dobj->tfd = NULL;
rc = apr_file_remove(dobj->tempfile, r->pool);
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r,
"file_cache_errorcleanup: Removed %s", dobj->tempfile);
}
return APR_SUCCESS;
}
static void debug_rlog_brigade(const char *file, int line, int module_index,
int level, apr_status_t status,
const request_rec *r, apr_bucket_brigade *bb,
const char *bbname)
{
apr_bucket *db;
char *btype;
apr_file_t *fd;
int i = 0;
for (db = APR_BRIGADE_FIRST(bb);
db != APR_BRIGADE_SENTINEL(bb);
db = APR_BUCKET_NEXT(db), i++)
{
btype = "UNKNOWN";
fd = NULL;
if(BUCKET_IS_DISKCACHE(db)) {
diskcache_bucket_data *a = db->data;
fd = a->fd;
btype = "DISKCACHE";
}
else if(APR_BUCKET_IS_FILE(db)) {
apr_bucket_file *a = db->data;
fd = a->fd;
btype = "FILE";
}
else if(APR_BUCKET_IS_HEAP(db)) {
btype = "HEAP";
}
else if(APR_BUCKET_IS_EOS(db)) {
btype = "EOS";
}
else if(APR_BUCKET_IS_FLUSH(db)) {
btype = "FLUSH";
}
else if(APR_BUCKET_IS_METADATA(db)) {
btype = "METADATA";
}
else if(AP_BUCKET_IS_EOR(db)) {
btype = "EOR";
}
else if(APR_BUCKET_IS_MMAP(db)) {
btype = "MMAP";
}
else if(APR_BUCKET_IS_PIPE(db)) {
btype = "PIPE";
}
else if(APR_BUCKET_IS_SOCKET(db)) {
btype = "SOCKET";
}
else if(APR_BUCKET_IS_TRANSIENT(db)) {
btype = "TRANSIENT";
}
else if(APR_BUCKET_IS_IMMORTAL(db)) {
btype = "IMMORTAL";
}
else if(APR_BUCKET_IS_POOL(db)) {
btype = "POOL";
}
if(fd) {
const char *fname=NULL;
apr_file_name_get(&fname, fd);
ap_log_rerror(file, line, module_index, level, status, r,
"%s bucket %d: type %s length %" APR_OFF_T_FMT " "
"offset %" APR_OFF_T_FMT " "
"fd %pp fdpool %pp fdname %s",
bbname, i, btype, db->length, db->start,
fd, apr_file_pool_get(fd), fname);
}
else {
ap_log_rerror(file, line, module_index, level, status, r,
"%s bucket %d: type %s length %" APR_OFF_T_FMT " "
"offset %" APR_OFF_T_FMT " ",
bbname, i, btype, db->length, db->start);
}
}
}
static const char* regen_key(apr_pool_t *p, apr_table_t *headers,
apr_array_header_t *varray, const char *oldkey)
{
struct iovec *iov;
int i, k;
int nvec;
const char *header;
const char **elts;
nvec = (varray->nelts * 2) + 1;
iov = apr_palloc(p, sizeof(struct iovec) * nvec);
elts = (const char **) varray->elts;
/* TODO:
* - Handle multiple-value headers better. (sort them?)
* - Handle Case in-sensitive Values better.
* This isn't the end of the world, since it just lowers the cache
* hit rate, but it would be nice to fix.
*
* The majority are case insenstive if they are values (encoding etc).
* Most of rfc2616 is case insensitive on header contents.
*
* So the better solution may be to identify headers which should be
* treated case-sensitive?
* HTTP URI's (3.2.3) [host and scheme are insensitive]
* HTTP method (5.1.1)
* HTTP-date values (3.3.1)
* 3.7 Media Types [exerpt]
* The type, subtype, and parameter attribute names are case-
* insensitive. Parameter values might or might not be case-sensitive,
* depending on the semantics of the parameter name.
* 4.20 Except [exerpt]
* Comparison of expectation values is case-insensitive for unquoted
* tokens (including the 100-continue token), and is case-sensitive for
* quoted-string expectation-extensions.
*/
for(i=0, k=0; i < varray->nelts; i++) {
header = apr_table_get(headers, elts[i]);
if (!header) {
header = "";
}
iov[k].iov_base = (char*) elts[i];
iov[k].iov_len = strlen(elts[i]);
k++;
iov[k].iov_base = (char*) header;
iov[k].iov_len = strlen(header);
k++;
}
iov[k].iov_base = (char*) oldkey;
iov[k].iov_len = strlen(oldkey);
k++;
return apr_pstrcatv(p, iov, k, NULL);
}
static int array_alphasort(const void *fn1, const void *fn2)
{
return strcmp(*(char**)fn1, *(char**)fn2);
}
static void tokens_to_array(apr_pool_t *p, const char *data,
apr_array_header_t *arr)
{
char *token;
while ((token = ap_get_list_item(p, &data)) != NULL) {
*((const char **) apr_array_push(arr)) = token;
}
/* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
qsort((void *) arr->elts, arr->nelts,
sizeof(char *), array_alphasort);
}
/*
* Hook and mod_cache callback functions
*/
static int create_entity(cache_handle_t *h, request_rec *r, const char *key,
apr_off_t len, apr_bucket_brigade *bb)
{
disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
&cache_disk_largefile_module);
cache_object_t *obj;
disk_cache_object_t *dobj;
apr_status_t rv;
if (conf->cache_root == NULL) {
return DECLINED;
}
/* we don't support caching of range requests (yet) */
if (r->status == HTTP_PARTIAL_CONTENT) {
if (APLOGrtrace1(r)) {
ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
"create_entity: partial content response not cached"
" URL %s", key);
}
return DECLINED;
}
/* Just ignore subrequests. They usually originate from content
generators like mod_autoindex anyway */
if (r->main != NULL) {
if (APLOGrtrace1(r)) {
ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
"create_entity: not cached, subrequest URL %s", key);
}
return DECLINED;
}
/* Don't allow HEAD requests to trigger any caching operations. The
reason for this is the large-file nature of this caching module,
triggering the caching of a huge file just to satisfy a HEAD request
is potentially resource-intensive.
We could compromise and just cache a header, but then we get into
trouble if we want to revalidate it into a complete cached item within
the update timeout. */
/* FIXME: Make this configureable? */
if(r->header_only) {
if (APLOGrtrace1(r)) {
ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
"create_entity: not cached, header_only request"
" URL %s", key);
}
return DECLINED;
}
if (APLOGrtrace2(r)) {
ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r,
"create_entity called. URL: %s r->filename: %s "
"finfo.filetype: %d "
"finfo.valid: %x "
"finfo.protection: %x "
"finfo.fname: %s "
"len: %" APR_OFF_T_FMT " "
"r->main: %pp "
"pools: r: %pp conn: %pp bb->p: %pp",
key, r->filename, r->finfo.filetype, r->finfo.valid,
r->finfo.protection,
r->finfo.fname?r->finfo.fname:"NULL", len, r->main,
r->pool, r->connection->pool, bb->p);
debug_rlog_brigade(APLOG_MARK, APLOG_TRACE2, 0, r, bb,
"create_entity bb");
}
/* Would really like to avoid caching of objects without
last-modified, but that doesn't seem to be available until
store_headers, from which we can't return DECLINED ...
So let's settle for only handling objects that stems from
files/directories for now.
*/
/* FIXME: It would make sense to make this configureable */
if(r->finfo.filetype != APR_REG && r->finfo.filetype != APR_DIR) {
if (APLOGrtrace1(r)) {
ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
"create_entity: not cached, not file/directory "
"(filetype: %d) URL %s",
r->finfo.filetype, key);
}
return DECLINED;
}
/* Allocate and initialize cache_object_t and disk_cache_object_t */
h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj));
obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj));
obj->key = apr_pstrdup(r->pool, key);
rv = apr_pool_create(&(dobj->tpool), r->pool);
if(rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, r,
"Unable to create temporary pool");
return DECLINED;
}
apr_pool_tag(dobj->tpool, "mod_cache_disk_largefile (create_entity)");
dobj->tbuf = NULL;
dobj->name = obj->key;
/* Save the cache root */
dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
dobj->root_len = conf->cache_root_len;
dobj->hdrsfile = cache_file(r->pool, conf, NULL, key, CACHE_HEADER_SUFFIX);
dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
dobj->initial_size = len;
dobj->file_size = -1;
dobj->lastmod = APR_DATE_BAD;
dobj->header_only = r->header_only;
dobj->bytes_sent = 0;
/* As of httpd 2.4 r->filename and r->finfo always seem to be set,
faked together from URL and document-root if there is no backing file!
r->finfo.filetype seems to be correct when it's a real file though...
*/
if(r->filename != NULL && strlen(r->filename) > 0) {
dobj->filename = r->filename;
}
if(r->finfo.filetype == APR_REG) {
char buf[34];
char *str;
int usedevino = TRUE;
/* finfo.protection (st_mode) set to zero if no such file */
if(r->finfo.protection == 0) {
usedevino = FALSE;
}
/* Is the device/inode in r->finfo valid? */
if(!(r->finfo.valid & APR_FINFO_IDENT)) {
usedevino = FALSE;
}
/* When possible, hash the body on dev:inode to minimize file
duplication. */
if(usedevino) {
apr_uint64_t device = r->finfo.device; /* Avoid ifdef ... */
apr_uint64_t inode = r->finfo.inode; /* ... type-mess */
apr_snprintf(buf, sizeof(buf), "%016" APR_UINT64_T_HEX_FMT ":%016"
APR_UINT64_T_HEX_FMT, device, inode);
str = buf;
}
else {
str = r->filename;
}
dobj->bodyfile = cache_file(r->pool, conf, NULL, str,
CACHE_BODY_SUFFIX);
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
"File %s was hashed using %s into %s",
r->filename, str, dobj->bodyfile);
}
else {
dobj->bodyfile = cache_file(r->pool, conf, NULL, key,
CACHE_BODY_SUFFIX);
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
"Body of URL %s was hashed into %s",
key, dobj->bodyfile);
}
return OK;
}
static apr_status_t file_read_timeout(apr_file_t *file, char * buf,
apr_size_t len, apr_time_t timeout)
{
apr_size_t left, done;
apr_finfo_t finfo;
apr_status_t rc;
apr_interval_time_t loopdelay=CACHE_LOOP_MINSLEEP;
done = 0;
left = len;
while(1) {
rc = apr_file_read_full(file, buf+done, left, &len);
if (rc == APR_SUCCESS) {
break;
}
done += len;
left -= len;
if(!APR_STATUS_IS_EOF(rc)) {
return rc;
}
rc = apr_file_info_get(&finfo, APR_FINFO_MTIME, file);
if(rc != APR_SUCCESS) {
return rc;
}
if(finfo.mtime < (apr_time_now() - timeout) ) {
return APR_ETIMEDOUT;
}
apr_sleep(loopdelay);
loopdelay <<= 1;
if(loopdelay > CACHE_LOOP_MAXSLEEP) {
loopdelay = CACHE_LOOP_MAXSLEEP;
}
}
return APR_SUCCESS;
}
static apr_status_t open_header(cache_object_t *obj, disk_cache_object_t *dobj,
request_rec *r, const char *key,
disk_cache_conf *conf)
{
int flags = APR_FOPEN_READ | APR_FOPEN_WRITE | APR_FOPEN_BINARY;
disk_cache_format_t format;
apr_status_t rc;
const char *nkey = key;
disk_cache_info_t disk_info;
/* Open header read/write so it's easy to rewrite it when needed */
rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool);
if (rc != APR_SUCCESS) {
return CACHE_EDECLINED;
}
/* read the format from the cache file */
rc = apr_file_read_full(dobj->hfd, &format, sizeof(format), NULL);
if(APR_STATUS_IS_EOF(rc)) {
return CACHE_ENODATA;
}
else if(rc != APR_SUCCESS) {
return rc;
}
/* Vary-files are being written to tmpfile and moved in place, so
the should always be complete */
if (format == VARY_FORMAT_VERSION) {
apr_array_header_t* varray;
apr_time_t expire;
char *p;
rc = apr_file_read_full(dobj->hfd, &expire, sizeof(expire), NULL);
if(rc != APR_SUCCESS) {
return rc;
}
varray = apr_array_make(r->pool, 5, sizeof(char*));
rc = read_array(r, varray, dobj->hfd);
if (rc != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r,
"Cannot parse vary header file: %s",
dobj->hdrsfile);
return CACHE_EDECLINED;
}
apr_file_close(dobj->hfd);
nkey = regen_key(r->pool, r->headers_in, varray, key);
dobj->prefix = dobj->hdrsfile;
p = strrchr((char *)dobj->prefix, '.');
if(p) {
/* Cut away the suffix */
*p = '\0';
}
dobj->hdrsfile = cache_file(r->pool, conf, dobj->prefix, nkey,
CACHE_HEADER_SUFFIX);
rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool);
if (rc != APR_SUCCESS) {
dobj->hfd = NULL;
return CACHE_EDECLINED;
}
rc = apr_file_read_full(dobj->hfd, &format, sizeof(format), NULL);
if(APR_STATUS_IS_EOF(rc)) {
return CACHE_ENODATA;
}
else if(rc != APR_SUCCESS) {
return rc;
}
}
if(format != DISK_FORMAT_VERSION) {
ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
"File '%s' had a version mismatch. File had "
"version: %d (current is %d). Deleted.", dobj->hdrsfile,
format, DISK_FORMAT_VERSION);
dobj->errcleanflags |= ERRCLEAN_HEADER;
file_cache_errorcleanup(dobj, r);
return CACHE_EDECLINED;
}
obj->key = nkey;
dobj->name = key;
/* read the data from the header file */
rc = apr_file_read_full(dobj->hfd, &disk_info, sizeof(disk_info), NULL);
if(APR_STATUS_IS_EOF(rc)) {
return CACHE_ENODATA;
}
else if(rc != APR_SUCCESS) {
return rc;
}
/* Store it away so we can get it later. */
dobj->disk_info = disk_info;