Skip to content

Commit

Permalink
Cleaning up unused code (#9) (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
stephen-pl authored Dec 19, 2023
1 parent 99bb7f7 commit 49ed9ce
Show file tree
Hide file tree
Showing 11 changed files with 163 additions and 38 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
# Test binary, built with `go test -c`
*.test

**/bafy.*

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

Expand All @@ -34,4 +36,4 @@
*.code-workspace

#sandbox
*.sandbox.*
*.sandbox.*
2 changes: 2 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ run:
- extern
- integration/repdao
- integration/repdao_dp
skip-files:
- pkg/net/host.go # this is failing for me because libp2p cannot be found on go1.20.1

linters:
enable-all: true
Expand Down
1 change: 1 addition & 0 deletions pkg/env/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const (
Longitude Key = "_LONGITUDE"
LotusAPIToken Key = "LOTUS_API_TOKEN"
LotusAPIUrl Key = "LOTUS_API_URL"
MaxChallengesPerLevel Key = "MAX_CHALLENGES_PER_LEVEL"
ProcessErrorInterval Key = "PROCESS_ERROR_INTERVAL"
ProcessModules Key = "PROCESS_MODULES"
ProviderCacheTTL Key = "PROVIDER_CACHE_TTL"
Expand Down
109 changes: 72 additions & 37 deletions pkg/net/bitswap.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@ package net

import (
"context"
"fmt"
"math"
"math/rand"
"time"

"github.com/data-preservation-programs/RetrievalBot/pkg/env"
"github.com/data-preservation-programs/RetrievalBot/pkg/task"
"github.com/ipfs/go-cid"
"github.com/ipfs/go-datastore"
Expand Down Expand Up @@ -113,6 +116,7 @@ func (c BitswapClient) Retrieve(
parent context.Context,
target peer.AddrInfo,
cid cid.Cid) (*task.RetrievalResult, error) {
fmt.Println(cid)
logger := logging.Logger("bitswap_client").With("cid", cid).With("target", target)
network := bsnet.NewFromIpfsHost(c.host, SingleContentRouter{
AddrInfo: target,
Expand Down Expand Up @@ -173,7 +177,15 @@ func (c BitswapClient) SpadeTraversal(parent context.Context,
startingCid cid.Cid,
maxTraverseDepth uint) (*task.RetrievalResult, error) {
logger := logging.Logger("bitswap_client_spade").With("cid", startingCid).With("target", target)
cidToRetrieve := startingCid

maxChallengesPerLevel := env.GetInt(env.MaxChallengesPerLevel, 0)
if maxChallengesPerLevel <= 0 {
maxChallengesPerLevel = 1
logger.Infof("MaxChallengesPerLevel is <= 0. Using 1 challenge per level")
}

cidsToRetrieve := initializeCidsToRetrieve(maxChallengesPerLevel, startingCid)
nextLayerIndexes := make([]datamodel.Link, 0, int(math.Pow(2, float64(maxTraverseDepth))))

// Initialize hosts and clients required to do all the retrieval tests
network := bsnet.NewFromIpfsHost(c.host, SingleContentRouter{
Expand All @@ -187,60 +199,83 @@ func (c BitswapClient) SpadeTraversal(parent context.Context,
startTime := time.Now()

i := uint(0)
var blk blocks.Block
for {
// Retrieval
logger.Infof("retrieving %s\n", cidToRetrieve.String())
blk, err := c.RetrieveBlock(parent, target, network, bswap, cidToRetrieve)

if err != nil {
return task.NewErrorRetrievalResultWithErrorResolution(task.RetrievalFailure, err), nil
for j, cidToRetrieve := range cidsToRetrieve {
// Retrieval
logger.Infof("retrieving %s\n", cidToRetrieve.String())
blk, err := c.RetrieveBlock(parent, target, network, bswap, cidToRetrieve)

if err != nil {
return task.NewErrorRetrievalResultWithErrorResolution(task.RetrievalFailure, err), nil
}

// Verify returned content hashes to the CID we're expecting
if !blk.Cid().Equals(cidToRetrieve) {
return task.NewErrorRetrievalResult(task.CIDMismatch,
errors.Errorf("retrieved cid does not match requested: %s, %s",
blk.Cid().String(), cidToRetrieve)), nil
}

// Wait until we are at max depth and tried all the challenges
if i == maxTraverseDepth && j == len(cidsToRetrieve)-1 {
var size = int64(len(blk.RawData()))
elapsed := time.Since(startTime)
logger.With("size", size).With("elapsed", elapsed).Info("Retrieved block")

// we've reached the requested depth of the tree
return task.NewSuccessfulRetrievalResult(elapsed, size, elapsed), nil
}

// if not at bottom of the tree, keep going down the links until we reach it or hit a dead end
links, err := FindLinks(parent, blk)
if err != nil {
return task.NewErrorRetrievalResultWithErrorResolution(task.CannotDecodeLinks, err), nil
}

logger.Debugf("cid %s has %d links\n", cidToRetrieve.String(), len(links))

nextLayerIndexes = append(nextLayerIndexes, links...)
}

// Verify returned content hashes to the CID we're expecting
if !blk.Cid().Equals(cidToRetrieve) {
return task.NewErrorRetrievalResult(task.CIDMismatch,
errors.Errorf("retrieved cid does not match requested: %s, %s",
blk.Cid().String(), cidToRetrieve)), nil
}

if i == maxTraverseDepth {
if len(nextLayerIndexes) == 0 {
var size = int64(len(blk.RawData()))
elapsed := time.Since(startTime)
logger.With("size", size).With("elapsed", elapsed).Info("Retrieved block")

// we've reached the requested depth of the tree
return task.NewSuccessfulRetrievalResult(elapsed, size, elapsed), nil
}

// if not at bottom of the tree, keep going down the links until we reach it or hit a dead end
links, err := FindLinks(parent, blk)
if err != nil {
return task.NewErrorRetrievalResultWithErrorResolution(task.CannotDecodeLinks, err), nil
}
// Clear out cids list to prevent resizing array
cidsToRetrieve = cidsToRetrieve[:0]

logger.Debugf("cid %s has %d links\n", cidToRetrieve.String(), len(links))
// Randomize a slice so we can find the next cids to challenge
rand.Shuffle(len(nextLayerIndexes), func(i, j int) {
nextLayerIndexes[i], nextLayerIndexes[j] = nextLayerIndexes[j], nextLayerIndexes[i]
})

if len(links) == 0 {
var size = int64(len(blk.RawData()))
elapsed := time.Since(startTime)
logger.With("size", size).With("elapsed", elapsed).Info("Retrieved block")

return task.NewSuccessfulRetrievalResult(elapsed, size, elapsed), nil
for j := 0; j < int(math.Min(float64(maxChallengesPerLevel), float64(len(nextLayerIndexes)))); j++ {
cid, err := cid.Parse(nextLayerIndexes[j].String())
if err != nil {
return task.NewErrorRetrievalResultWithErrorResolution(task.CIDCodecNotSupported, err), nil
}
cidsToRetrieve = append(cidsToRetrieve, cid)
}

// randomly pick a link to go down
//nolint:all we don't need crypto secured random numbers
nextIndex := rand.Intn(len(links))
// Clear out nextLayerIndexes list to help prevent resizing array
nextLayerIndexes = nextLayerIndexes[:0]

cidToRetrieve, err = cid.Parse(links[nextIndex].String())
if err != nil {
return task.NewErrorRetrievalResultWithErrorResolution(task.CIDCodecNotSupported, err), nil
}

i++ // To the next layer of the tree
// To the next layer of the tree
i++
}
}

func initializeCidsToRetrieve(maxChallengesPerLevel int, startingCid cid.Cid) []cid.Cid {
cidsToRetrieve := make([]cid.Cid, 0, maxChallengesPerLevel)
cidsToRetrieve = append(cidsToRetrieve, startingCid)
return cidsToRetrieve
}

// Returns the raw block data, the links, and error if any
// Takes in `network` and `bswap` client, so that it can be used in a loop for traversals / multiple retrievals
func (c BitswapClient) RetrieveBlock(
Expand Down
25 changes: 25 additions & 0 deletions pkg/net/bitswap_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package net

import (
"testing"

"github.com/data-preservation-programs/RetrievalBot/pkg/env"
"github.com/ipfs/go-cid"
"github.com/stretchr/testify/assert"
)

func TestInitializeCidsToRetrieve(t *testing.T) {
challenge4 := initializeCidsToRetrieve(4, cid.Cid{})

assert.Equal(t, 1, len(challenge4))
}

func TestValidateGetMaxChallengesPerLevel(t *testing.T) {
t.Setenv("MAX_CHALLENGES_PER_LEVEL", "5")
var challenges = env.GetInt(env.MaxChallengesPerLevel, 5)
assert.Equal(t, 5, challenges)

t.Setenv("MAX_CHALLENGES_PER_LEVEL", "50")
challenges = env.GetInt(env.MaxChallengesPerLevel, 50)
assert.Equal(t, 50, challenges)
}
1 change: 1 addition & 0 deletions pkg/net/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package net

import (
"context"

"github.com/libp2p/go-libp2p"
"github.com/libp2p/go-libp2p/core/host"
"github.com/libp2p/go-libp2p/core/network"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{"RecordType":"DagAggregatePreamble","Version":1}
{"RecordType":"DagAggregateSummary","EntryCount":4,"EntriesSortedBy":"DagCidV1","Description":"Aggregate of non-related DAGs, produced by github.com/filecoin-project/go-dagaggregator-unixfs"}
{"RecordType":"DagAggregateEntry","DagCidV1":"bafkreialad2qaplrgjs2x2rs4fycwmwmpmocantoho3doxulmbmlrg6qea","PathPrefixes":["baf...ea","baf...6qea"],"PathIndexes":[1,0,0]}
{"RecordType":"DagAggregateEntry","DagCidV1":"bafkreiarcpog7fgb3cvs4iznh6jcqtxgyyk5rbsmk4dvxuty5tylof6qea","PathPrefixes":["baf...ea","baf...6qea"],"PathIndexes":[1,0,1]}
{"RecordType":"DagAggregateEntry","DagCidV1":"bafkreihjji2ny4zwyh7ubc3bmdb5tj455vi5fhsbwf2uvcw6l75z446qea","PathPrefixes":["baf...ea","baf...6qea"],"PathIndexes":[1,0,2]}
{"RecordType":"DagAggregateEntry","DagCidV1":"bafybeibhbx3y6tnn7q4gpsous6apnobft5jybvroiepdsmvps2lmycjjxu","DagCidV0":"QmQy6xmJhrcC5QLboAcGFcAE1tC8CrwDVkrHdEYJkLscrQ","DagSize":42,"NodeCount":1,"PathPrefixes":["baf...xu","baf...jjxu"],"PathIndexes":[2,0,0]}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

IPFS -- Inter-Planetary File system

IPFS is a global, versioned, peer-to-peer filesystem. It combines good ideas
from Git, BitTorrent, Kademlia, SFS, and the Web. It is like a single bit-
torrent swarm, exchanging git objects. IPFS provides an interface as simple
as the HTTP web, but with permanence built-in. You can also mount the world
at /ipfs.

IPFS is a protocol:
- defines a content-addressed file system
- coordinates content delivery
- combines Kademlia + BitTorrent + Git

IPFS is a filesystem:
- has directories and files
- mountable filesystem (via FUSE)

IPFS is a web:
- can be used to view documents like the web
- files accessible via HTTP at `http://ipfs.io/<path>`
- browsers or extensions can learn to use `ipfs://` directly
- hash-addressed content guarantees the authenticity

IPFS is modular:
- connection layer over any network protocol
- routing layer
- uses a routing layer DHT (kademlia/coral)
- uses a path-based naming service
- uses BitTorrent-inspired block exchange

IPFS uses crypto:
- cryptographic-hash content addressing
- block-level deduplication
- file integrity + versioning
- filesystem-level encryption + signing support

IPFS is p2p:
- worldwide peer-to-peer file transfers
- completely decentralized architecture
- **no** central point of failure

IPFS is a CDN:
- add a file to the filesystem locally, and it's now available to the world
- caching-friendly (content-hash naming)
- BitTorrent-based bandwidth distribution

IPFS has a name service:
- IPNS, an SFS inspired name system
- global namespace based on PKI
- serves to build trust chains
- compatible with other NSes
- can map DNS, .onion, .bit, etc to IPNS

0 comments on commit 49ed9ce

Please sign in to comment.