Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

process archive in mememory #75

Merged
merged 1 commit into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion analyze.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
package k6deps

import "encoding/json"
import (
"encoding/json"
"os"
)

// Analyze searches, loads and analyzes the specified sources,
// extracting the k6 extensions and their version constraints.
// Note: if archive is specified, the other three sources will not be taken into account,
// since the archive may contain them.
func Analyze(opts *Options) (Dependencies, error) {
if !opts.Archive.IsEmpty() {
return archiveAnalizer(opts.Archive)()
}

if err := loadSources(opts); err != nil {
return nil, err
}
Expand Down Expand Up @@ -86,6 +93,28 @@ func envAnalyzer(src Source) analyzer {
}
}

func archiveAnalizer(src Source) analyzer {
return func() (Dependencies, error) {
input := src.Reader
if input == nil {
tar, err := os.Open(src.Name) //nolint:forbidigo
if err != nil {
return nil, err
}
defer tar.Close() //nolint:errcheck

input = tar
}

analyzer, err := processArchive(input)
if err != nil {
return nil, err
}

return analyzer()
}
}

func mergeAnalyzers(sources ...analyzer) analyzer {
return func() (Dependencies, error) {
deps := make(Dependencies)
Expand Down
157 changes: 44 additions & 113 deletions archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,157 +2,88 @@ package k6deps

import (
"archive/tar"
"bytes"
"encoding/json"
"errors"
"io"
"os"
"path/filepath"
"strings"

"github.com/grafana/k6pack"
"slices"
)

//nolint:forbidigo
func loadMetadata(dir string, opts *Options) error {
var meta archiveMetadata

data, err := os.ReadFile(filepath.Join(filepath.Clean(dir), "metadata.json"))
if err != nil {
return err
}

if err = json.Unmarshal(data, &meta); err != nil {
return err
}

opts.Manifest.Ignore = true // no manifest (yet) in archive

opts.Script.Name = filepath.Join(
dir,
"file",
filepath.FromSlash(strings.TrimPrefix(meta.Filename, "file:///")),
)

if value, found := meta.Env[EnvDependencies]; found {
opts.Env.Name = EnvDependencies
opts.Env.Contents = []byte(value)
} else {
opts.Env.Ignore = true
}

contents, err := os.ReadFile(opts.Script.Name)
if err != nil {
return err
}

script, _, err := k6pack.Pack(string(contents), &k6pack.Options{Filename: opts.Script.Name})
if err != nil {
return err
}

opts.Script.Contents = script

return nil
}

type archiveMetadata struct {
Filename string `json:"filename"`
Env map[string]string `json:"env"`
}

const maxFileSize = 1024 * 1024 * 10 // 10M

//nolint:forbidigo
func extractArchive(dir string, input io.Reader) error {
func processArchive(input io.Reader) (analyzer, error) {
reader := tar.NewReader(input)

analyzers := make([]analyzer, 0)

for {
header, err := reader.Next()

switch {
case err == io.EOF:
return nil
case errors.Is(err, io.EOF):
return mergeAnalyzers(analyzers...), nil
case err != nil:
return err
return nil, err
case header == nil:
continue
}

target := filepath.Join(dir, filepath.Clean(filepath.FromSlash(header.Name)))

switch header.Typeflag {
case tar.TypeDir:
if err := os.MkdirAll(target, 0o750); err != nil {
return err
}
if header.Typeflag != tar.TypeReg || !shouldProcess(header.Name) {
continue
}

case tar.TypeReg:
if shouldSkip(target) {
continue
}
content := &bytes.Buffer{}
if _, err := io.CopyN(content, reader, maxFileSize); err != nil && !errors.Is(err, io.EOF) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a corner case, but still, I'm thinking isn't that right for the files that are bigger than 10 MB 🤔 Shoudld we maybe write a warning log in that case.

If the other suggestion about analizeMetadata will be accepted, we could probably move these lines closer to the scriptAnalyzer

And perhaps not for that PR, but it's worth investigation if we can instead of copying content.Bytes() just pass around readers, in other words make possible to scriptAnalyzer also work with buffers

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a corner case, but still, I'm thinking isn't that right for the files that are bigger than 10 MB 🤔 Shoudld we maybe write a warning log in that case.

Not sure what you refer to in "isn't that right". Could you please elaborate?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So my understanding that this line will copy at maximum 10MB, silently ignoring the rest, and I'm questioning if that's a right way to do. Yes, the risks that we processing bigger files where module usage located after 10MB of data is low, but still

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And perhaps not for that PR, but it's worth investigation if we can instead of copying content.Bytes() just pass around readers, in other words make possible to scriptAnalyzer also work with buffers

I definitely will do this. I started but realized it requires significant changes in other parts of the code and prefer to make this in a follow-up PR.

Copy link
Contributor

@olegbespalov olegbespalov Mar 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, like I said, both my comments are non-blocking, feel free to merge this as it is and continue in follow-up PRs 👍

return nil, err
}

file, err := os.OpenFile(filepath.Clean(target), os.O_CREATE|os.O_RDWR, os.FileMode(header.Mode)) //nolint:gosec
// if the file is metadata.json, we extract the dependencies from the env
if header.Name == "metadata.json" {
analyzer, err := analizeMetadata(content.Bytes())
if err != nil {
return err
}

if _, err := io.CopyN(file, reader, maxFileSize); err != nil && !errors.Is(err, io.EOF) {
return err
}

if err = file.Close(); err != nil {
return err
}

// if it is a link or symlink, we copy the content of the linked file to the target
// we assume the linked file was already processed and exists in the directory.
case tar.TypeLink, tar.TypeSymlink:
if shouldSkip(target) {
continue
return nil, err
}
analyzers = append(analyzers, analyzer)
continue
}

linkedFile := filepath.Join(dir, filepath.Clean(filepath.FromSlash(header.Linkname)))
if err := followLink(linkedFile, target); err != nil {
return err
}
// analize the file content as an script
target := filepath.Clean(filepath.FromSlash(header.Name))
src := Source{
Name: target,
Contents: content.Bytes(),
}

analyzers = append(analyzers, scriptAnalyzer(src))
}
}

// indicates if the file should be skipped during extraction
// we skip csv files and .json except metadata.json
func shouldSkip(target string) bool {
// indicates if the file should be processed during extraction
func shouldProcess(target string) bool {
ext := filepath.Ext(target)
return ext == ".csv" || (ext == ".json" && filepath.Base(target) != "metadata.json")
return slices.Contains([]string{".js", ".ts"}, ext) || slices.Contains([]string{"metadata.json", "data"}, target)
}

//nolint:forbidigo
func followLink(linkedFile string, target string) error {
source, err := os.Open(filepath.Clean(linkedFile))
if err != nil {
return err
}
defer source.Close() //nolint:errcheck

// we need to get the lined file info to create the target file with the same permissions
info, err := source.Stat()
if err != nil {
return err
}

file, err := os.OpenFile(target, os.O_CREATE|os.O_WRONLY, info.Mode()) //nolint:gosec
if err != nil {
return err
// analizeMetadata extracts the dependencies from the metadata.json file
func analizeMetadata(content []byte) (analyzer, error) {
metadata := archiveMetadata{}
if err := json.Unmarshal(content, &metadata); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm not mistaken, we could use json.NewDecoder and then there is no need to pass bytes, but we could use io.Reader directly here

return nil, err
}

_, err = io.Copy(file, source)
if err != nil {
return err
if value, found := metadata.Env[EnvDependencies]; found {
src := Source{
Name: EnvDependencies,
Contents: []byte(value),
}
return envAnalyzer(src), nil
}

err = file.Close()
if err != nil {
return err
}
return nil
return empty, nil
}
7 changes: 3 additions & 4 deletions archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,10 @@ func Test_analyzeArchive_Reader(t *testing.T) {
Archive: Source{Reader: file},
}

actual, err := Analyze(opts)
require.NoError(t, err)
expected := &Dependencies{}
_ = expected.UnmarshalText([]byte(`k6>0.54;k6/x/faker>0.4.0;k6/x/sql>=1.0.1;k6/x/sql/driver/ramsql*`))

expected, err := Analyze(opts)
actual, err := Analyze(opts)
require.NoError(t, err)

require.Equal(t, expected.String(), actual.String())
}
7 changes: 3 additions & 4 deletions cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,15 @@ func deps(opts *options, args []string) error {
}

if opts.input != "" && !ignoreStdin {
buffer := &bytes.Buffer{}
buffer.ReadFrom(os.Stdin) //nolint:errcheck,forbidigo,gosec

switch opts.input {
case "js":
buffer := &bytes.Buffer{}
buffer.ReadFrom(os.Stdin) //nolint:errcheck,forbidigo,gosec
opts.Script.Name = "stdin"
opts.Script.Contents = buffer.Bytes()
case "tar":
opts.Archive.Name = "stdin"
opts.Archive.Contents = buffer.Bytes()
opts.Archive.Reader = os.Stdin //nolint:forbidigo
default:
return fmt.Errorf("unsupported input format: %s", opts.input)
}
Expand Down
62 changes: 0 additions & 62 deletions options.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package k6deps

import (
"bytes"
"errors"
"io"
"os"
Expand Down Expand Up @@ -31,28 +30,6 @@ func (s *Source) IsEmpty() bool {
return len(s.Contents) == 0 && s.Reader == nil && len(s.Name) == 0
}

func (s *Source) getReader() (io.Reader, func() error, error) {
if s.Reader != nil {
return s.Reader, nil, nil
}

if len(s.Contents) > 0 {
return bytes.NewReader(s.Contents), nil, nil
}

fileName, err := filepath.Abs(s.Name)
if err != nil {
return nil, nil, err
}

file, err := os.Open(filepath.Clean(fileName)) //nolint:forbidigo
if err != nil {
return nil, nil, err
}

return file, file.Close, nil
}

// Options contains the parameters of the dependency analysis.
type Options struct {
// Script contains the properties of the k6 test script to be analyzed.
Expand Down Expand Up @@ -99,10 +76,6 @@ func (opts *Options) lookupEnv(key string) (string, bool) {
}

func loadSources(opts *Options) error {
if !opts.Archive.Ignore && !opts.Archive.IsEmpty() {
return loadArchive(opts)
}

if err := loadManifest(opts); err != nil {
return err
}
Expand Down Expand Up @@ -214,38 +187,3 @@ func findManifest(filename string) ([]byte, string, bool, error) {

return nil, "", false, nil
}

//nolint:forbidigo
func loadArchive(opts *Options) error {
if opts.Archive.Ignore || opts.Archive.IsEmpty() {
return nil
}

reader, closer, err := opts.Archive.getReader()
if err != nil {
return err
}
if closer != nil {
defer closer() //nolint:errcheck
}

dir, err := os.MkdirTemp("", "k6deps-*")
if err != nil {
return err
}

defer os.RemoveAll(dir) //nolint:errcheck

err = extractArchive(dir, reader)
if err != nil {
return err
}

// archive should be self contained
opts.Script.Ignore = true
opts.Archive.Ignore = true
opts.Env.Ignore = true
opts.Manifest.Ignore = true

return loadMetadata(dir, opts)
}
Loading