-
-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
FEAT: rebuild legacy rank and store #45
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package checks | ||
|
||
import "github.com/xray-web/web-check-api/checks/store/legacyrank" | ||
|
||
type DomainRank struct { | ||
Domain string `json:"domain"` | ||
Rank int `json:"rank"` | ||
} | ||
|
||
type LegacyRank struct { | ||
data legacyrank.Getter | ||
} | ||
|
||
func NewLegacyRank(lrg legacyrank.Getter) *LegacyRank { | ||
return &LegacyRank{data: lrg} | ||
} | ||
|
||
func (lr *LegacyRank) LegacyRank(domain string) (*DomainRank, error) { | ||
rank, err := lr.data.GetLegacyRank(domain) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return &DomainRank{ | ||
Domain: domain, | ||
Rank: rank, | ||
}, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package checks | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/xray-web/web-check-api/checks/store/legacyrank" | ||
) | ||
|
||
func TestLegacyRank(t *testing.T) { | ||
t.Parallel() | ||
|
||
t.Run("get rank", func(t *testing.T) { | ||
t.Parallel() | ||
lr := NewLegacyRank(legacyrank.GetterFunc(func(domain string) (int, error) { | ||
return 1, nil | ||
})) | ||
dr, err := lr.LegacyRank("example.com") | ||
assert.NoError(t, err) | ||
assert.Equal(t, 1, dr.Rank) | ||
assert.Equal(t, "example.com", dr.Domain) | ||
}) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
package legacyrank | ||
|
||
import ( | ||
"archive/zip" | ||
"bytes" | ||
"context" | ||
"encoding/csv" | ||
"errors" | ||
"io" | ||
"log" | ||
"net/http" | ||
"strconv" | ||
"sync" | ||
"time" | ||
) | ||
|
||
var ErrNotFound = errors.New("domain not found") | ||
|
||
type Getter interface { | ||
GetLegacyRank(domain string) (int, error) | ||
} | ||
|
||
type GetterFunc func(domain string) (int, error) | ||
|
||
func (f GetterFunc) GetLegacyRank(domain string) (int, error) { | ||
return f(domain) | ||
} | ||
|
||
type InMemoryStore struct{} | ||
|
||
var once sync.Once | ||
var data map[string]int //map of domain to rank | ||
|
||
func NewInMemoryStore() *InMemoryStore { | ||
return &InMemoryStore{} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would we return the actual InMemoryStore, instead of a pointer to it? Or would that just be unnecessary here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pointer was as i intended to originally to have the data inside the struct. but then moved it to a package with a global for the one off load, either would work here, the difference is negligible although strictly speaking non pointer would be a tiny bit faster |
||
} | ||
|
||
func (s *InMemoryStore) GetLegacyRank(url string) (int, error) { | ||
once.Do(func() { | ||
var err error | ||
data, err = load() | ||
if err != nil { | ||
log.Println(err) | ||
} | ||
}) | ||
|
||
rank, ok := data[url] | ||
if !ok { | ||
return -1, ErrNotFound | ||
} | ||
return rank, nil | ||
} | ||
|
||
func load() (map[string]int, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might be able to be split up a bit smaller There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah i dont like the the fact it needs to be loaded into memory first, difference of taking in a Reader and ReaderAt with the zip reader, its possible to stream a zip file. but not with the native APIs There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And maybe around error handling too. As it's beeing logged but looks like it's not returned to the caller. Maybe error handling is something i can think about at a more global level, I'm not sure how that typically works with Go, I'd need to look into it. In the JS world, I'd have a reusable error class/function, which is called whenever there's an error. So that error handling is consistent and observability/monitoring can just be implemented in one place. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Erros in golang are handled when the happen not at global level. |
||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) | ||
defer cancel() | ||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip", nil) | ||
if err != nil { | ||
return nil, err | ||
} | ||
client := &http.Client{ | ||
Timeout: time.Second * 10, | ||
} | ||
resp, err := client.Do(req) | ||
if err != nil { | ||
return nil, err | ||
} | ||
defer resp.Body.Close() | ||
b, err := io.ReadAll(resp.Body) | ||
if err != nil { | ||
return nil, err | ||
} | ||
zf, err := zip.NewReader(bytes.NewReader(b), int64(len(b))) | ||
if err != nil { | ||
return nil, err | ||
} | ||
f, err := zf.Open("top-1m.csv") | ||
if err != nil { | ||
return nil, err | ||
} | ||
defer f.Close() | ||
r := csv.NewReader(f) | ||
data := make(map[string]int) | ||
for { | ||
record, err := r.Read() | ||
if err == io.EOF { | ||
break | ||
} | ||
if err != nil { | ||
return nil, err | ||
} | ||
rank, err := strconv.Atoi(record[0]) | ||
if err != nil { | ||
return nil, err | ||
} | ||
data[record[1]] = rank | ||
} | ||
return data, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package legacyrank_test | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/xray-web/web-check-api/checks/store/legacyrank" | ||
) | ||
|
||
func TestInMemoryStore(t *testing.T) { | ||
t.Parallel() | ||
|
||
t.Run("get google rank", func(t *testing.T) { | ||
t.Parallel() | ||
ims := legacyrank.NewInMemoryStore() | ||
dr, err := ims.GetLegacyRank("google.com") | ||
assert.NoError(t, err, dr) | ||
}) | ||
|
||
t.Run("get microsoft rank", func(t *testing.T) { | ||
t.Parallel() | ||
ims := legacyrank.NewInMemoryStore() | ||
dr, err := ims.GetLegacyRank("microsoft.com") | ||
assert.NoError(t, err, dr) | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this be encapsulated in the
InMemoryStore
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could but the once download would be per instance of memory store instead of once when the module is imported, the data would need to be at the handler level and as this is just a temporary solution, i didnt want to put it at handler level