@@ -32,7 +32,13 @@ var ErrNotFound = fmt.Errorf("not found")
32
32
// on extremely large (likely impractical) HAMTs that are unable to be
33
33
// represented with the hash function used. Hash functions with larger byte
34
34
// output increase the maximum theoretical depth of a HAMT.
35
- var ErrMaxDepth = fmt .Errorf ("attempted to traverse hamt beyond max depth" )
35
+ var ErrMaxDepth = fmt .Errorf ("attempted to traverse HAMT beyond max-depth" )
36
+
37
+ // ErrMalformedHamt is returned whenever a block intended as a HAMT node does
38
+ // not conform to the expected form that a block may take. This can occur
39
+ // during block-load where initial validation takes place or during traversal
40
+ // where certain conditions are expected to be met.
41
+ var ErrMalformedHamt = fmt .Errorf ("HAMT node was malformed" )
36
42
37
43
//-----------------------------------------------------------------------------
38
44
// Serialized data structures
@@ -302,15 +308,10 @@ func (p *Pointer) loadChild(ctx context.Context, ns cbor.IpldStore, bitWidth int
302
308
return p .cache , nil
303
309
}
304
310
305
- out , err := LoadNode (ctx , ns , p .Link )
311
+ out , err := loadNode (ctx , ns , p .Link , false , bitWidth , hash )
306
312
if err != nil {
307
313
return nil , err
308
314
}
309
- // these don't get set in LoadNode because we don't have the Options
310
- // at this point but what is inherited from the parents may have varied
311
- // from the defaults
312
- out .bitWidth = bitWidth
313
- out .hash = hash
314
315
315
316
p .cache = out
316
317
return out , nil
@@ -328,33 +329,79 @@ func (p *Pointer) loadChild(ctx context.Context, ns cbor.IpldStore, bitWidth int
328
329
// of this library to remain the defaults long-term and have strategies in
329
330
// place to manage variations.
330
331
func LoadNode (ctx context.Context , cs cbor.IpldStore , c cid.Cid , options ... Option ) (* Node , error ) {
331
- // TODO(rvagg): loaded nodes must be validated to make sure we have only
332
- // the correct form of Nodes to avoid attacks from alternative implementations
333
- // that feed us poorly formed data. Check that:
334
- // 1. Pointers contains the correct number of elements defined by Bitfield
335
- // 2. Pointers contain *only* a link or a bucket (this may already be done in
336
- // the CBOR unmarshal but might be worth doing here so the check is all in
337
- // one place)
338
- // 3. Pointers with links have are DAG-CBOR multicodec
339
- // 4. KV buckets contain strictly between 1 and bucketSize elements
340
- // 5. KV buckets are ordered by key (bytewise comparison)
341
- // 6. keys and values are valid (what are the rules? len(key)>0? can val be
342
- // nul? etc.)
343
- // 7. .. potentially we could validate the position of elements if we propagate
344
- // the depth of this node so we know which bits to chomp off the hash digest.
332
+ return loadNode (ctx , cs , c , true , defaultBitWidth , defaultHashFunction , options ... )
333
+ }
334
+
335
+ // internal version of loadNode that is aware of whether this is a root node or
336
+ // not for the purpose of additional validation on non-root nodes.
337
+ func loadNode (
338
+ ctx context.Context ,
339
+ cs cbor.IpldStore ,
340
+ c cid.Cid ,
341
+ isRoot bool ,
342
+ bitWidth int ,
343
+ hashFunction func ([]byte ) []byte ,
344
+ options ... Option ,
345
+ ) (* Node , error ) {
346
+
345
347
var out Node
346
348
if err := cs .Get (ctx , c , & out ); err != nil {
347
349
return nil , err
348
350
}
349
351
350
352
out .store = cs
351
- out .bitWidth = defaultBitWidth
352
- out .hash = defaultHashFunction
353
+ out .bitWidth = bitWidth
354
+ out .hash = hashFunction
353
355
// apply functional options to node before using
354
356
for _ , option := range options {
355
357
option (& out )
356
358
}
357
359
360
+ // Validation
361
+
362
+ // too many elements in the data array for the configured bitWidth?
363
+ if len (out .Pointers ) > 1 << uint (out .bitWidth ) {
364
+ return nil , ErrMalformedHamt
365
+ }
366
+
367
+ // the bifield is lying or the elements array is
368
+ if out .bitsSetCount () != len (out .Pointers ) {
369
+ return nil , ErrMalformedHamt
370
+ }
371
+
372
+ for _ , ch := range out .Pointers {
373
+ isLink := ch .isShard ()
374
+ isBucket := ch .KVs != nil
375
+ if ! ((isLink && ! isBucket ) || (! isLink && isBucket )) {
376
+ return nil , ErrMalformedHamt
377
+ }
378
+ if isLink && ch .Link .Type () != cid .DagCBOR { // not dag-cbor
379
+ return nil , ErrMalformedHamt
380
+ }
381
+ if isBucket {
382
+ if len (ch .KVs ) == 0 || len (ch .KVs ) > bucketSize {
383
+ return nil , ErrMalformedHamt
384
+ }
385
+ for i := 1 ; i < len (ch .KVs ); i ++ {
386
+ if bytes .Compare (ch .KVs [i - 1 ].Key , ch .KVs [i ].Key ) >= 0 {
387
+ return nil , ErrMalformedHamt
388
+ }
389
+ }
390
+ }
391
+ }
392
+
393
+ if ! isRoot {
394
+ // the only valid empty node is a root node
395
+ if len (out .Pointers ) == 0 {
396
+ return nil , ErrMalformedHamt
397
+ }
398
+ // a non-root node that contains <=bucketSize direct elements should not
399
+ // exist under compaction rules
400
+ if out .directChildCount () == 0 && out .directKVCount () <= bucketSize {
401
+ return nil , ErrMalformedHamt
402
+ }
403
+ }
404
+
358
405
return & out , nil
359
406
}
360
407
@@ -488,12 +535,7 @@ func (n *Node) cleanChild(chnd *Node, cindex byte) error {
488
535
return nil
489
536
}
490
537
491
- l := len (chnd .Pointers )
492
- if l == 0 {
493
- return fmt .Errorf ("incorrectly formed HAMT" )
494
- }
495
-
496
- if l == 1 {
538
+ if len (chnd .Pointers ) == 1 {
497
539
// The case where the child node has a single bucket, which we know can
498
540
// only contain `bucketSize` elements (maximum), so we need to pull that
499
541
// bucket up into this node.
0 commit comments