Skip to content

Commit 1b0d1f9

Browse files
authored
Merge pull request #59 from rvagg/rvagg/load-validation
Load-time block format validation
2 parents be2d564 + 570d059 commit 1b0d1f9

File tree

5 files changed

+418
-29
lines changed

5 files changed

+418
-29
lines changed

Diff for: Makefile

+5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ test:
88
go test ./...
99
.PHONY: test
1010

11+
coverage:
12+
go test -coverprofile=coverage.out ./...
13+
go tool cover -html=coverage.out
14+
.PHONY: coverage
15+
1116
benchmark:
1217
go test -bench=./...
1318
.PHONY: benchmark

Diff for: hamt.go

+71-29
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,13 @@ var ErrNotFound = fmt.Errorf("not found")
3232
// on extremely large (likely impractical) HAMTs that are unable to be
3333
// represented with the hash function used. Hash functions with larger byte
3434
// output increase the maximum theoretical depth of a HAMT.
35-
var ErrMaxDepth = fmt.Errorf("attempted to traverse hamt beyond max depth")
35+
var ErrMaxDepth = fmt.Errorf("attempted to traverse HAMT beyond max-depth")
36+
37+
// ErrMalformedHamt is returned whenever a block intended as a HAMT node does
38+
// not conform to the expected form that a block may take. This can occur
39+
// during block-load where initial validation takes place or during traversal
40+
// where certain conditions are expected to be met.
41+
var ErrMalformedHamt = fmt.Errorf("HAMT node was malformed")
3642

3743
//-----------------------------------------------------------------------------
3844
// Serialized data structures
@@ -302,15 +308,10 @@ func (p *Pointer) loadChild(ctx context.Context, ns cbor.IpldStore, bitWidth int
302308
return p.cache, nil
303309
}
304310

305-
out, err := LoadNode(ctx, ns, p.Link)
311+
out, err := loadNode(ctx, ns, p.Link, false, bitWidth, hash)
306312
if err != nil {
307313
return nil, err
308314
}
309-
// these don't get set in LoadNode because we don't have the Options
310-
// at this point but what is inherited from the parents may have varied
311-
// from the defaults
312-
out.bitWidth = bitWidth
313-
out.hash = hash
314315

315316
p.cache = out
316317
return out, nil
@@ -328,33 +329,79 @@ func (p *Pointer) loadChild(ctx context.Context, ns cbor.IpldStore, bitWidth int
328329
// of this library to remain the defaults long-term and have strategies in
329330
// place to manage variations.
330331
func LoadNode(ctx context.Context, cs cbor.IpldStore, c cid.Cid, options ...Option) (*Node, error) {
331-
// TODO(rvagg): loaded nodes must be validated to make sure we have only
332-
// the correct form of Nodes to avoid attacks from alternative implementations
333-
// that feed us poorly formed data. Check that:
334-
// 1. Pointers contains the correct number of elements defined by Bitfield
335-
// 2. Pointers contain *only* a link or a bucket (this may already be done in
336-
// the CBOR unmarshal but might be worth doing here so the check is all in
337-
// one place)
338-
// 3. Pointers with links have are DAG-CBOR multicodec
339-
// 4. KV buckets contain strictly between 1 and bucketSize elements
340-
// 5. KV buckets are ordered by key (bytewise comparison)
341-
// 6. keys and values are valid (what are the rules? len(key)>0? can val be
342-
// nul? etc.)
343-
// 7. .. potentially we could validate the position of elements if we propagate
344-
// the depth of this node so we know which bits to chomp off the hash digest.
332+
return loadNode(ctx, cs, c, true, defaultBitWidth, defaultHashFunction, options...)
333+
}
334+
335+
// internal version of loadNode that is aware of whether this is a root node or
336+
// not for the purpose of additional validation on non-root nodes.
337+
func loadNode(
338+
ctx context.Context,
339+
cs cbor.IpldStore,
340+
c cid.Cid,
341+
isRoot bool,
342+
bitWidth int,
343+
hashFunction func([]byte) []byte,
344+
options ...Option,
345+
) (*Node, error) {
346+
345347
var out Node
346348
if err := cs.Get(ctx, c, &out); err != nil {
347349
return nil, err
348350
}
349351

350352
out.store = cs
351-
out.bitWidth = defaultBitWidth
352-
out.hash = defaultHashFunction
353+
out.bitWidth = bitWidth
354+
out.hash = hashFunction
353355
// apply functional options to node before using
354356
for _, option := range options {
355357
option(&out)
356358
}
357359

360+
// Validation
361+
362+
// too many elements in the data array for the configured bitWidth?
363+
if len(out.Pointers) > 1<<uint(out.bitWidth) {
364+
return nil, ErrMalformedHamt
365+
}
366+
367+
// the bifield is lying or the elements array is
368+
if out.bitsSetCount() != len(out.Pointers) {
369+
return nil, ErrMalformedHamt
370+
}
371+
372+
for _, ch := range out.Pointers {
373+
isLink := ch.isShard()
374+
isBucket := ch.KVs != nil
375+
if !((isLink && !isBucket) || (!isLink && isBucket)) {
376+
return nil, ErrMalformedHamt
377+
}
378+
if isLink && ch.Link.Type() != cid.DagCBOR { // not dag-cbor
379+
return nil, ErrMalformedHamt
380+
}
381+
if isBucket {
382+
if len(ch.KVs) == 0 || len(ch.KVs) > bucketSize {
383+
return nil, ErrMalformedHamt
384+
}
385+
for i := 1; i < len(ch.KVs); i++ {
386+
if bytes.Compare(ch.KVs[i-1].Key, ch.KVs[i].Key) >= 0 {
387+
return nil, ErrMalformedHamt
388+
}
389+
}
390+
}
391+
}
392+
393+
if !isRoot {
394+
// the only valid empty node is a root node
395+
if len(out.Pointers) == 0 {
396+
return nil, ErrMalformedHamt
397+
}
398+
// a non-root node that contains <=bucketSize direct elements should not
399+
// exist under compaction rules
400+
if out.directChildCount() == 0 && out.directKVCount() <= bucketSize {
401+
return nil, ErrMalformedHamt
402+
}
403+
}
404+
358405
return &out, nil
359406
}
360407

@@ -488,12 +535,7 @@ func (n *Node) cleanChild(chnd *Node, cindex byte) error {
488535
return nil
489536
}
490537

491-
l := len(chnd.Pointers)
492-
if l == 0 {
493-
return fmt.Errorf("incorrectly formed HAMT")
494-
}
495-
496-
if l == 1 {
538+
if len(chnd.Pointers) == 1 {
497539
// The case where the child node has a single bucket, which we know can
498540
// only contain `bucketSize` elements (maximum), so we need to pull that
499541
// bucket up into this node.

0 commit comments

Comments
 (0)