-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
process archive in mememory #75
Merged
+80
−184
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,157 +2,88 @@ package k6deps | |
|
||
import ( | ||
"archive/tar" | ||
"bytes" | ||
"encoding/json" | ||
"errors" | ||
"io" | ||
"os" | ||
"path/filepath" | ||
"strings" | ||
|
||
"github.com/grafana/k6pack" | ||
"slices" | ||
) | ||
|
||
//nolint:forbidigo | ||
func loadMetadata(dir string, opts *Options) error { | ||
var meta archiveMetadata | ||
|
||
data, err := os.ReadFile(filepath.Join(filepath.Clean(dir), "metadata.json")) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if err = json.Unmarshal(data, &meta); err != nil { | ||
return err | ||
} | ||
|
||
opts.Manifest.Ignore = true // no manifest (yet) in archive | ||
|
||
opts.Script.Name = filepath.Join( | ||
dir, | ||
"file", | ||
filepath.FromSlash(strings.TrimPrefix(meta.Filename, "file:///")), | ||
) | ||
|
||
if value, found := meta.Env[EnvDependencies]; found { | ||
opts.Env.Name = EnvDependencies | ||
opts.Env.Contents = []byte(value) | ||
} else { | ||
opts.Env.Ignore = true | ||
} | ||
|
||
contents, err := os.ReadFile(opts.Script.Name) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
script, _, err := k6pack.Pack(string(contents), &k6pack.Options{Filename: opts.Script.Name}) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
opts.Script.Contents = script | ||
|
||
return nil | ||
} | ||
|
||
type archiveMetadata struct { | ||
Filename string `json:"filename"` | ||
Env map[string]string `json:"env"` | ||
} | ||
|
||
const maxFileSize = 1024 * 1024 * 10 // 10M | ||
|
||
//nolint:forbidigo | ||
func extractArchive(dir string, input io.Reader) error { | ||
func processArchive(input io.Reader) (analyzer, error) { | ||
reader := tar.NewReader(input) | ||
|
||
analyzers := make([]analyzer, 0) | ||
|
||
for { | ||
header, err := reader.Next() | ||
|
||
switch { | ||
case err == io.EOF: | ||
return nil | ||
case errors.Is(err, io.EOF): | ||
return mergeAnalyzers(analyzers...), nil | ||
case err != nil: | ||
return err | ||
return nil, err | ||
case header == nil: | ||
continue | ||
} | ||
|
||
target := filepath.Join(dir, filepath.Clean(filepath.FromSlash(header.Name))) | ||
|
||
switch header.Typeflag { | ||
case tar.TypeDir: | ||
if err := os.MkdirAll(target, 0o750); err != nil { | ||
return err | ||
} | ||
if header.Typeflag != tar.TypeReg || !shouldProcess(header.Name) { | ||
continue | ||
} | ||
|
||
case tar.TypeReg: | ||
if shouldSkip(target) { | ||
continue | ||
} | ||
content := &bytes.Buffer{} | ||
if _, err := io.CopyN(content, reader, maxFileSize); err != nil && !errors.Is(err, io.EOF) { | ||
return nil, err | ||
} | ||
|
||
file, err := os.OpenFile(filepath.Clean(target), os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode)) //nolint:gosec | ||
// if the file is metadata.json, we extract the dependencies from the env | ||
if header.Name == "metadata.json" { | ||
analyzer, err := analizeMetadata(content.Bytes()) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if _, err := io.CopyN(file, reader, maxFileSize); err != nil && !errors.Is(err, io.EOF) { | ||
return err | ||
} | ||
|
||
if err = file.Close(); err != nil { | ||
return err | ||
} | ||
|
||
// if it is a link or symlink, we copy the content of the linked file to the target | ||
// we assume the linked file was already processed and exists in the directory. | ||
case tar.TypeLink, tar.TypeSymlink: | ||
if shouldSkip(target) { | ||
continue | ||
return nil, err | ||
} | ||
analyzers = append(analyzers, analyzer) | ||
continue | ||
} | ||
|
||
linkedFile := filepath.Join(dir, filepath.Clean(filepath.FromSlash(header.Linkname))) | ||
if err := followLink(linkedFile, target); err != nil { | ||
return err | ||
} | ||
// analize the file content as an script | ||
target := filepath.Clean(filepath.FromSlash(header.Name)) | ||
src := Source{ | ||
Name: target, | ||
Contents: content.Bytes(), | ||
} | ||
|
||
analyzers = append(analyzers, scriptAnalyzer(src)) | ||
} | ||
} | ||
|
||
// indicates if the file should be skipped during extraction | ||
// we skip csv files and .json except metadata.json | ||
func shouldSkip(target string) bool { | ||
// indicates if the file should be processed during extraction | ||
func shouldProcess(target string) bool { | ||
ext := filepath.Ext(target) | ||
return ext == ".csv" || (ext == ".json" && filepath.Base(target) != "metadata.json") | ||
return slices.Contains([]string{".js", ".ts"}, ext) || slices.Contains([]string{"metadata.json", "data"}, target) | ||
} | ||
|
||
//nolint:forbidigo | ||
func followLink(linkedFile string, target string) error { | ||
source, err := os.Open(filepath.Clean(linkedFile)) | ||
if err != nil { | ||
return err | ||
} | ||
defer source.Close() //nolint:errcheck | ||
|
||
// we need to get the lined file info to create the target file with the same permissions | ||
info, err := source.Stat() | ||
if err != nil { | ||
return err | ||
} | ||
|
||
file, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY, info.Mode()) //nolint:gosec | ||
if err != nil { | ||
return err | ||
// analizeMetadata extracts the dependencies from the metadata.json file | ||
func analizeMetadata(content []byte) (analyzer, error) { | ||
metadata := archiveMetadata{} | ||
if err := json.Unmarshal(content, &metadata); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I'm not mistaken, we could use |
||
return nil, err | ||
} | ||
|
||
_, err = io.Copy(file, source) | ||
if err != nil { | ||
return err | ||
if value, found := metadata.Env[EnvDependencies]; found { | ||
src := Source{ | ||
Name: EnvDependencies, | ||
Contents: []byte(value), | ||
} | ||
return envAnalyzer(src), nil | ||
} | ||
|
||
err = file.Close() | ||
if err != nil { | ||
return err | ||
} | ||
return nil | ||
return empty, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's a corner case, but still, I'm thinking isn't that right for the files that are bigger than 10 MB 🤔 Shoudld we maybe write a warning log in that case.
If the other suggestion about
analizeMetadata
will be accepted, we could probably move these lines closer to thescriptAnalyzer
And perhaps not for that PR, but it's worth investigation if we can instead of copying
content.Bytes()
just pass around readers, in other words make possible toscriptAnalyzer
also work with buffersThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure what you refer to in "isn't that right". Could you please elaborate?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So my understanding that this line will copy at maximum 10MB, silently ignoring the rest, and I'm questioning if that's a right way to do. Yes, the risks that we processing bigger files where module usage located after 10MB of data is low, but still
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I definitely will do this. I started but realized it requires significant changes in other parts of the code and prefer to make this in a follow-up PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, like I said, both my comments are non-blocking, feel free to merge this as it is and continue in follow-up PRs 👍